You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2915 lines
75 KiB
2915 lines
75 KiB
6 months ago
|
"""
|
||
|
This module contains a set of functions for vectorized string
|
||
|
operations and methods.
|
||
|
|
||
|
.. note::
|
||
|
The `chararray` class exists for backwards compatibility with
|
||
|
Numarray, it is not recommended for new development. Starting from numpy
|
||
|
1.4, if one needs arrays of strings, it is recommended to use arrays of
|
||
|
`dtype` `object_`, `bytes_` or `str_`, and use the free functions
|
||
|
in the `numpy.char` module for fast vectorized string operations.
|
||
|
|
||
|
Some methods will only be available if the corresponding string method is
|
||
|
available in your version of Python.
|
||
|
|
||
|
The preferred alias for `defchararray` is `numpy.char`.
|
||
|
|
||
|
"""
|
||
|
import functools
|
||
|
|
||
|
from .._utils import set_module
|
||
|
from .numerictypes import (
|
||
|
bytes_, str_, integer, int_, object_, bool_, character)
|
||
|
from .numeric import ndarray, compare_chararrays
|
||
|
from .numeric import array as narray
|
||
|
from numpy.core.multiarray import _vec_string
|
||
|
from numpy.core import overrides
|
||
|
from numpy.compat import asbytes
|
||
|
import numpy
|
||
|
|
||
|
__all__ = [
|
||
|
'equal', 'not_equal', 'greater_equal', 'less_equal',
|
||
|
'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize',
|
||
|
'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs',
|
||
|
'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace',
|
||
|
'istitle', 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'partition',
|
||
|
'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit',
|
||
|
'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase',
|
||
|
'title', 'translate', 'upper', 'zfill', 'isnumeric', 'isdecimal',
|
||
|
'array', 'asarray'
|
||
|
]
|
||
|
|
||
|
|
||
|
_globalvar = 0
|
||
|
|
||
|
array_function_dispatch = functools.partial(
|
||
|
overrides.array_function_dispatch, module='numpy.char')
|
||
|
|
||
|
|
||
|
def _is_unicode(arr):
|
||
|
"""Returns True if arr is a string or a string array with a dtype that
|
||
|
represents a unicode string, otherwise returns False.
|
||
|
|
||
|
"""
|
||
|
if (isinstance(arr, str) or
|
||
|
issubclass(numpy.asarray(arr).dtype.type, str)):
|
||
|
return True
|
||
|
return False
|
||
|
|
||
|
|
||
|
def _to_bytes_or_str_array(result, output_dtype_like=None):
|
||
|
"""
|
||
|
Helper function to cast a result back into an array
|
||
|
with the appropriate dtype if an object array must be used
|
||
|
as an intermediary.
|
||
|
"""
|
||
|
ret = numpy.asarray(result.tolist())
|
||
|
dtype = getattr(output_dtype_like, 'dtype', None)
|
||
|
if dtype is not None:
|
||
|
return ret.astype(type(dtype)(_get_num_chars(ret)), copy=False)
|
||
|
return ret
|
||
|
|
||
|
|
||
|
def _clean_args(*args):
|
||
|
"""
|
||
|
Helper function for delegating arguments to Python string
|
||
|
functions.
|
||
|
|
||
|
Many of the Python string operations that have optional arguments
|
||
|
do not use 'None' to indicate a default value. In these cases,
|
||
|
we need to remove all None arguments, and those following them.
|
||
|
"""
|
||
|
newargs = []
|
||
|
for chk in args:
|
||
|
if chk is None:
|
||
|
break
|
||
|
newargs.append(chk)
|
||
|
return newargs
|
||
|
|
||
|
def _get_num_chars(a):
|
||
|
"""
|
||
|
Helper function that returns the number of characters per field in
|
||
|
a string or unicode array. This is to abstract out the fact that
|
||
|
for a unicode array this is itemsize / 4.
|
||
|
"""
|
||
|
if issubclass(a.dtype.type, str_):
|
||
|
return a.itemsize // 4
|
||
|
return a.itemsize
|
||
|
|
||
|
|
||
|
def _binary_op_dispatcher(x1, x2):
|
||
|
return (x1, x2)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_binary_op_dispatcher)
|
||
|
def equal(x1, x2):
|
||
|
"""
|
||
|
Return (x1 == x2) element-wise.
|
||
|
|
||
|
Unlike `numpy.equal`, this comparison is performed by first
|
||
|
stripping whitespace characters from the end of the string. This
|
||
|
behavior is provided for backward-compatibility with numarray.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x1, x2 : array_like of str or unicode
|
||
|
Input arrays of the same shape.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of bools.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
not_equal, greater_equal, less_equal, greater, less
|
||
|
"""
|
||
|
return compare_chararrays(x1, x2, '==', True)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_binary_op_dispatcher)
|
||
|
def not_equal(x1, x2):
|
||
|
"""
|
||
|
Return (x1 != x2) element-wise.
|
||
|
|
||
|
Unlike `numpy.not_equal`, this comparison is performed by first
|
||
|
stripping whitespace characters from the end of the string. This
|
||
|
behavior is provided for backward-compatibility with numarray.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x1, x2 : array_like of str or unicode
|
||
|
Input arrays of the same shape.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of bools.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
equal, greater_equal, less_equal, greater, less
|
||
|
"""
|
||
|
return compare_chararrays(x1, x2, '!=', True)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_binary_op_dispatcher)
|
||
|
def greater_equal(x1, x2):
|
||
|
"""
|
||
|
Return (x1 >= x2) element-wise.
|
||
|
|
||
|
Unlike `numpy.greater_equal`, this comparison is performed by
|
||
|
first stripping whitespace characters from the end of the string.
|
||
|
This behavior is provided for backward-compatibility with
|
||
|
numarray.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x1, x2 : array_like of str or unicode
|
||
|
Input arrays of the same shape.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of bools.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
equal, not_equal, less_equal, greater, less
|
||
|
"""
|
||
|
return compare_chararrays(x1, x2, '>=', True)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_binary_op_dispatcher)
|
||
|
def less_equal(x1, x2):
|
||
|
"""
|
||
|
Return (x1 <= x2) element-wise.
|
||
|
|
||
|
Unlike `numpy.less_equal`, this comparison is performed by first
|
||
|
stripping whitespace characters from the end of the string. This
|
||
|
behavior is provided for backward-compatibility with numarray.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x1, x2 : array_like of str or unicode
|
||
|
Input arrays of the same shape.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of bools.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
equal, not_equal, greater_equal, greater, less
|
||
|
"""
|
||
|
return compare_chararrays(x1, x2, '<=', True)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_binary_op_dispatcher)
|
||
|
def greater(x1, x2):
|
||
|
"""
|
||
|
Return (x1 > x2) element-wise.
|
||
|
|
||
|
Unlike `numpy.greater`, this comparison is performed by first
|
||
|
stripping whitespace characters from the end of the string. This
|
||
|
behavior is provided for backward-compatibility with numarray.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x1, x2 : array_like of str or unicode
|
||
|
Input arrays of the same shape.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of bools.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
equal, not_equal, greater_equal, less_equal, less
|
||
|
"""
|
||
|
return compare_chararrays(x1, x2, '>', True)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_binary_op_dispatcher)
|
||
|
def less(x1, x2):
|
||
|
"""
|
||
|
Return (x1 < x2) element-wise.
|
||
|
|
||
|
Unlike `numpy.greater`, this comparison is performed by first
|
||
|
stripping whitespace characters from the end of the string. This
|
||
|
behavior is provided for backward-compatibility with numarray.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x1, x2 : array_like of str or unicode
|
||
|
Input arrays of the same shape.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of bools.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
equal, not_equal, greater_equal, less_equal, greater
|
||
|
"""
|
||
|
return compare_chararrays(x1, x2, '<', True)
|
||
|
|
||
|
|
||
|
def _unary_op_dispatcher(a):
|
||
|
return (a,)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_unary_op_dispatcher)
|
||
|
def str_len(a):
|
||
|
"""
|
||
|
Return len(a) element-wise.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of integers
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
len
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> a = np.array(['Grace Hopper Conference', 'Open Source Day'])
|
||
|
>>> np.char.str_len(a)
|
||
|
array([23, 15])
|
||
|
>>> a = np.array([u'\u0420', u'\u043e'])
|
||
|
>>> np.char.str_len(a)
|
||
|
array([1, 1])
|
||
|
>>> a = np.array([['hello', 'world'], [u'\u0420', u'\u043e']])
|
||
|
>>> np.char.str_len(a)
|
||
|
array([[5, 5], [1, 1]])
|
||
|
"""
|
||
|
# Note: __len__, etc. currently return ints, which are not C-integers.
|
||
|
# Generally intp would be expected for lengths, although int is sufficient
|
||
|
# due to the dtype itemsize limitation.
|
||
|
return _vec_string(a, int_, '__len__')
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_binary_op_dispatcher)
|
||
|
def add(x1, x2):
|
||
|
"""
|
||
|
Return element-wise string concatenation for two arrays of str or unicode.
|
||
|
|
||
|
Arrays `x1` and `x2` must have the same shape.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x1 : array_like of str or unicode
|
||
|
Input array.
|
||
|
x2 : array_like of str or unicode
|
||
|
Input array.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
add : ndarray
|
||
|
Output array of `bytes_` or `str_`, depending on input types
|
||
|
of the same shape as `x1` and `x2`.
|
||
|
|
||
|
"""
|
||
|
arr1 = numpy.asarray(x1)
|
||
|
arr2 = numpy.asarray(x2)
|
||
|
out_size = _get_num_chars(arr1) + _get_num_chars(arr2)
|
||
|
|
||
|
if type(arr1.dtype) != type(arr2.dtype):
|
||
|
# Enforce this for now. The solution to it will be implement add
|
||
|
# as a ufunc. It never worked right on Python 3: bytes + unicode gave
|
||
|
# nonsense unicode + bytes errored, and unicode + object used the
|
||
|
# object dtype itemsize as num chars (worked on short strings).
|
||
|
# bytes + void worked but promoting void->bytes is dubious also.
|
||
|
raise TypeError(
|
||
|
"np.char.add() requires both arrays of the same dtype kind, but "
|
||
|
f"got dtypes: '{arr1.dtype}' and '{arr2.dtype}' (the few cases "
|
||
|
"where this used to work often lead to incorrect results).")
|
||
|
|
||
|
return _vec_string(arr1, type(arr1.dtype)(out_size), '__add__', (arr2,))
|
||
|
|
||
|
def _multiply_dispatcher(a, i):
|
||
|
return (a,)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_multiply_dispatcher)
|
||
|
def multiply(a, i):
|
||
|
"""
|
||
|
Return (a * i), that is string multiple concatenation,
|
||
|
element-wise.
|
||
|
|
||
|
Values in `i` of less than 0 are treated as 0 (which yields an
|
||
|
empty string).
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
i : array_like of ints
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of str or unicode, depending on input types
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> a = np.array(["a", "b", "c"])
|
||
|
>>> np.char.multiply(x, 3)
|
||
|
array(['aaa', 'bbb', 'ccc'], dtype='<U3')
|
||
|
>>> i = np.array([1, 2, 3])
|
||
|
>>> np.char.multiply(a, i)
|
||
|
array(['a', 'bb', 'ccc'], dtype='<U3')
|
||
|
>>> np.char.multiply(np.array(['a']), i)
|
||
|
array(['a', 'aa', 'aaa'], dtype='<U3')
|
||
|
>>> a = np.array(['a', 'b', 'c', 'd', 'e', 'f']).reshape((2, 3))
|
||
|
>>> np.char.multiply(a, 3)
|
||
|
array([['aaa', 'bbb', 'ccc'],
|
||
|
['ddd', 'eee', 'fff']], dtype='<U3')
|
||
|
>>> np.char.multiply(a, i)
|
||
|
array([['a', 'bb', 'ccc'],
|
||
|
['d', 'ee', 'fff']], dtype='<U3')
|
||
|
"""
|
||
|
a_arr = numpy.asarray(a)
|
||
|
i_arr = numpy.asarray(i)
|
||
|
if not issubclass(i_arr.dtype.type, integer):
|
||
|
raise ValueError("Can only multiply by integers")
|
||
|
out_size = _get_num_chars(a_arr) * max(int(i_arr.max()), 0)
|
||
|
return _vec_string(
|
||
|
a_arr, type(a_arr.dtype)(out_size), '__mul__', (i_arr,))
|
||
|
|
||
|
|
||
|
def _mod_dispatcher(a, values):
|
||
|
return (a, values)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_mod_dispatcher)
|
||
|
def mod(a, values):
|
||
|
"""
|
||
|
Return (a % i), that is pre-Python 2.6 string formatting
|
||
|
(interpolation), element-wise for a pair of array_likes of str
|
||
|
or unicode.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
values : array_like of values
|
||
|
These values will be element-wise interpolated into the string.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of str or unicode, depending on input types
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.__mod__
|
||
|
|
||
|
"""
|
||
|
return _to_bytes_or_str_array(
|
||
|
_vec_string(a, object_, '__mod__', (values,)), a)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_unary_op_dispatcher)
|
||
|
def capitalize(a):
|
||
|
"""
|
||
|
Return a copy of `a` with only the first character of each element
|
||
|
capitalized.
|
||
|
|
||
|
Calls `str.capitalize` element-wise.
|
||
|
|
||
|
For 8-bit strings, this method is locale-dependent.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
Input array of strings to capitalize.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of str or unicode, depending on input
|
||
|
types
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.capitalize
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4'); c
|
||
|
array(['a1b2', '1b2a', 'b2a1', '2a1b'],
|
||
|
dtype='|S4')
|
||
|
>>> np.char.capitalize(c)
|
||
|
array(['A1b2', '1b2a', 'B2a1', '2a1b'],
|
||
|
dtype='|S4')
|
||
|
|
||
|
"""
|
||
|
a_arr = numpy.asarray(a)
|
||
|
return _vec_string(a_arr, a_arr.dtype, 'capitalize')
|
||
|
|
||
|
|
||
|
def _center_dispatcher(a, width, fillchar=None):
|
||
|
return (a,)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_center_dispatcher)
|
||
|
def center(a, width, fillchar=' '):
|
||
|
"""
|
||
|
Return a copy of `a` with its elements centered in a string of
|
||
|
length `width`.
|
||
|
|
||
|
Calls `str.center` element-wise.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
width : int
|
||
|
The length of the resulting strings
|
||
|
fillchar : str or unicode, optional
|
||
|
The padding character to use (default is space).
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of str or unicode, depending on input
|
||
|
types
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.center
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
This function is intended to work with arrays of strings. The
|
||
|
fill character is not applied to numeric types.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> c = np.array(['a1b2','1b2a','b2a1','2a1b']); c
|
||
|
array(['a1b2', '1b2a', 'b2a1', '2a1b'], dtype='<U4')
|
||
|
>>> np.char.center(c, width=9)
|
||
|
array([' a1b2 ', ' 1b2a ', ' b2a1 ', ' 2a1b '], dtype='<U9')
|
||
|
>>> np.char.center(c, width=9, fillchar='*')
|
||
|
array(['***a1b2**', '***1b2a**', '***b2a1**', '***2a1b**'], dtype='<U9')
|
||
|
>>> np.char.center(c, width=1)
|
||
|
array(['a', '1', 'b', '2'], dtype='<U1')
|
||
|
|
||
|
"""
|
||
|
a_arr = numpy.asarray(a)
|
||
|
width_arr = numpy.asarray(width)
|
||
|
size = int(numpy.max(width_arr.flat))
|
||
|
if numpy.issubdtype(a_arr.dtype, numpy.bytes_):
|
||
|
fillchar = asbytes(fillchar)
|
||
|
return _vec_string(
|
||
|
a_arr, type(a_arr.dtype)(size), 'center', (width_arr, fillchar))
|
||
|
|
||
|
|
||
|
def _count_dispatcher(a, sub, start=None, end=None):
|
||
|
return (a,)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_count_dispatcher)
|
||
|
def count(a, sub, start=0, end=None):
|
||
|
"""
|
||
|
Returns an array with the number of non-overlapping occurrences of
|
||
|
substring `sub` in the range [`start`, `end`].
|
||
|
|
||
|
Calls `str.count` element-wise.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
sub : str or unicode
|
||
|
The substring to search for.
|
||
|
|
||
|
start, end : int, optional
|
||
|
Optional arguments `start` and `end` are interpreted as slice
|
||
|
notation to specify the range in which to count.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of ints.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.count
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
|
||
|
>>> c
|
||
|
array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
|
||
|
>>> np.char.count(c, 'A')
|
||
|
array([3, 1, 1])
|
||
|
>>> np.char.count(c, 'aA')
|
||
|
array([3, 1, 0])
|
||
|
>>> np.char.count(c, 'A', start=1, end=4)
|
||
|
array([2, 1, 1])
|
||
|
>>> np.char.count(c, 'A', start=1, end=3)
|
||
|
array([1, 0, 0])
|
||
|
|
||
|
"""
|
||
|
return _vec_string(a, int_, 'count', [sub, start] + _clean_args(end))
|
||
|
|
||
|
|
||
|
def _code_dispatcher(a, encoding=None, errors=None):
|
||
|
return (a,)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_code_dispatcher)
|
||
|
def decode(a, encoding=None, errors=None):
|
||
|
r"""
|
||
|
Calls ``bytes.decode`` element-wise.
|
||
|
|
||
|
The set of available codecs comes from the Python standard library,
|
||
|
and may be extended at runtime. For more information, see the
|
||
|
:mod:`codecs` module.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
encoding : str, optional
|
||
|
The name of an encoding
|
||
|
|
||
|
errors : str, optional
|
||
|
Specifies how to handle encoding errors
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
:py:meth:`bytes.decode`
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
The type of the result will depend on the encoding specified.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> c = np.array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',
|
||
|
... b'\x81\x82\xc2\xc1\xc2\x82\x81'])
|
||
|
>>> c
|
||
|
array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',
|
||
|
... b'\x81\x82\xc2\xc1\xc2\x82\x81'], dtype='|S7')
|
||
|
>>> np.char.decode(c, encoding='cp037')
|
||
|
array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
|
||
|
|
||
|
"""
|
||
|
return _to_bytes_or_str_array(
|
||
|
_vec_string(a, object_, 'decode', _clean_args(encoding, errors)))
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_code_dispatcher)
|
||
|
def encode(a, encoding=None, errors=None):
|
||
|
"""
|
||
|
Calls `str.encode` element-wise.
|
||
|
|
||
|
The set of available codecs comes from the Python standard library,
|
||
|
and may be extended at runtime. For more information, see the codecs
|
||
|
module.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
encoding : str, optional
|
||
|
The name of an encoding
|
||
|
|
||
|
errors : str, optional
|
||
|
Specifies how to handle encoding errors
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.encode
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
The type of the result will depend on the encoding specified.
|
||
|
|
||
|
"""
|
||
|
return _to_bytes_or_str_array(
|
||
|
_vec_string(a, object_, 'encode', _clean_args(encoding, errors)))
|
||
|
|
||
|
|
||
|
def _endswith_dispatcher(a, suffix, start=None, end=None):
|
||
|
return (a,)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_endswith_dispatcher)
|
||
|
def endswith(a, suffix, start=0, end=None):
|
||
|
"""
|
||
|
Returns a boolean array which is `True` where the string element
|
||
|
in `a` ends with `suffix`, otherwise `False`.
|
||
|
|
||
|
Calls `str.endswith` element-wise.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
suffix : str
|
||
|
|
||
|
start, end : int, optional
|
||
|
With optional `start`, test beginning at that position. With
|
||
|
optional `end`, stop comparing at that position.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Outputs an array of bools.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.endswith
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> s = np.array(['foo', 'bar'])
|
||
|
>>> s[0] = 'foo'
|
||
|
>>> s[1] = 'bar'
|
||
|
>>> s
|
||
|
array(['foo', 'bar'], dtype='<U3')
|
||
|
>>> np.char.endswith(s, 'ar')
|
||
|
array([False, True])
|
||
|
>>> np.char.endswith(s, 'a', start=1, end=2)
|
||
|
array([False, True])
|
||
|
|
||
|
"""
|
||
|
return _vec_string(
|
||
|
a, bool_, 'endswith', [suffix, start] + _clean_args(end))
|
||
|
|
||
|
|
||
|
def _expandtabs_dispatcher(a, tabsize=None):
|
||
|
return (a,)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_expandtabs_dispatcher)
|
||
|
def expandtabs(a, tabsize=8):
|
||
|
"""
|
||
|
Return a copy of each string element where all tab characters are
|
||
|
replaced by one or more spaces.
|
||
|
|
||
|
Calls `str.expandtabs` element-wise.
|
||
|
|
||
|
Return a copy of each string element where all tab characters are
|
||
|
replaced by one or more spaces, depending on the current column
|
||
|
and the given `tabsize`. The column number is reset to zero after
|
||
|
each newline occurring in the string. This doesn't understand other
|
||
|
non-printing characters or escape sequences.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
Input array
|
||
|
tabsize : int, optional
|
||
|
Replace tabs with `tabsize` number of spaces. If not given defaults
|
||
|
to 8 spaces.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of str or unicode, depending on input type
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.expandtabs
|
||
|
|
||
|
"""
|
||
|
return _to_bytes_or_str_array(
|
||
|
_vec_string(a, object_, 'expandtabs', (tabsize,)), a)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_count_dispatcher)
|
||
|
def find(a, sub, start=0, end=None):
|
||
|
"""
|
||
|
For each element, return the lowest index in the string where
|
||
|
substring `sub` is found.
|
||
|
|
||
|
Calls `str.find` element-wise.
|
||
|
|
||
|
For each element, return the lowest index in the string where
|
||
|
substring `sub` is found, such that `sub` is contained in the
|
||
|
range [`start`, `end`].
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
sub : str or unicode
|
||
|
|
||
|
start, end : int, optional
|
||
|
Optional arguments `start` and `end` are interpreted as in
|
||
|
slice notation.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray or int
|
||
|
Output array of ints. Returns -1 if `sub` is not found.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.find
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> a = np.array(["NumPy is a Python library"])
|
||
|
>>> np.char.find(a, "Python", start=0, end=None)
|
||
|
array([11])
|
||
|
|
||
|
"""
|
||
|
return _vec_string(
|
||
|
a, int_, 'find', [sub, start] + _clean_args(end))
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_count_dispatcher)
|
||
|
def index(a, sub, start=0, end=None):
|
||
|
"""
|
||
|
Like `find`, but raises `ValueError` when the substring is not found.
|
||
|
|
||
|
Calls `str.index` element-wise.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
sub : str or unicode
|
||
|
|
||
|
start, end : int, optional
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of ints. Returns -1 if `sub` is not found.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
find, str.find
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> a = np.array(["Computer Science"])
|
||
|
>>> np.char.index(a, "Science", start=0, end=None)
|
||
|
array([9])
|
||
|
|
||
|
"""
|
||
|
return _vec_string(
|
||
|
a, int_, 'index', [sub, start] + _clean_args(end))
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_unary_op_dispatcher)
|
||
|
def isalnum(a):
|
||
|
"""
|
||
|
Returns true for each element if all characters in the string are
|
||
|
alphanumeric and there is at least one character, false otherwise.
|
||
|
|
||
|
Calls `str.isalnum` element-wise.
|
||
|
|
||
|
For 8-bit strings, this method is locale-dependent.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of str or unicode, depending on input type
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.isalnum
|
||
|
"""
|
||
|
return _vec_string(a, bool_, 'isalnum')
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_unary_op_dispatcher)
|
||
|
def isalpha(a):
|
||
|
"""
|
||
|
Returns true for each element if all characters in the string are
|
||
|
alphabetic and there is at least one character, false otherwise.
|
||
|
|
||
|
Calls `str.isalpha` element-wise.
|
||
|
|
||
|
For 8-bit strings, this method is locale-dependent.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of bools
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.isalpha
|
||
|
"""
|
||
|
return _vec_string(a, bool_, 'isalpha')
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_unary_op_dispatcher)
|
||
|
def isdigit(a):
|
||
|
"""
|
||
|
Returns true for each element if all characters in the string are
|
||
|
digits and there is at least one character, false otherwise.
|
||
|
|
||
|
Calls `str.isdigit` element-wise.
|
||
|
|
||
|
For 8-bit strings, this method is locale-dependent.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of bools
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.isdigit
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> a = np.array(['a', 'b', '0'])
|
||
|
>>> np.char.isdigit(a)
|
||
|
array([False, False, True])
|
||
|
>>> a = np.array([['a', 'b', '0'], ['c', '1', '2']])
|
||
|
>>> np.char.isdigit(a)
|
||
|
array([[False, False, True], [False, True, True]])
|
||
|
"""
|
||
|
return _vec_string(a, bool_, 'isdigit')
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_unary_op_dispatcher)
|
||
|
def islower(a):
|
||
|
"""
|
||
|
Returns true for each element if all cased characters in the
|
||
|
string are lowercase and there is at least one cased character,
|
||
|
false otherwise.
|
||
|
|
||
|
Calls `str.islower` element-wise.
|
||
|
|
||
|
For 8-bit strings, this method is locale-dependent.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of bools
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.islower
|
||
|
"""
|
||
|
return _vec_string(a, bool_, 'islower')
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_unary_op_dispatcher)
|
||
|
def isspace(a):
|
||
|
"""
|
||
|
Returns true for each element if there are only whitespace
|
||
|
characters in the string and there is at least one character,
|
||
|
false otherwise.
|
||
|
|
||
|
Calls `str.isspace` element-wise.
|
||
|
|
||
|
For 8-bit strings, this method is locale-dependent.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of bools
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.isspace
|
||
|
"""
|
||
|
return _vec_string(a, bool_, 'isspace')
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_unary_op_dispatcher)
|
||
|
def istitle(a):
|
||
|
"""
|
||
|
Returns true for each element if the element is a titlecased
|
||
|
string and there is at least one character, false otherwise.
|
||
|
|
||
|
Call `str.istitle` element-wise.
|
||
|
|
||
|
For 8-bit strings, this method is locale-dependent.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of bools
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.istitle
|
||
|
"""
|
||
|
return _vec_string(a, bool_, 'istitle')
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_unary_op_dispatcher)
|
||
|
def isupper(a):
|
||
|
"""
|
||
|
Return true for each element if all cased characters in the
|
||
|
string are uppercase and there is at least one character, false
|
||
|
otherwise.
|
||
|
|
||
|
Call `str.isupper` element-wise.
|
||
|
|
||
|
For 8-bit strings, this method is locale-dependent.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of bools
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.isupper
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> str = "GHC"
|
||
|
>>> np.char.isupper(str)
|
||
|
array(True)
|
||
|
>>> a = np.array(["hello", "HELLO", "Hello"])
|
||
|
>>> np.char.isupper(a)
|
||
|
array([False, True, False])
|
||
|
|
||
|
"""
|
||
|
return _vec_string(a, bool_, 'isupper')
|
||
|
|
||
|
|
||
|
def _join_dispatcher(sep, seq):
|
||
|
return (sep, seq)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_join_dispatcher)
|
||
|
def join(sep, seq):
|
||
|
"""
|
||
|
Return a string which is the concatenation of the strings in the
|
||
|
sequence `seq`.
|
||
|
|
||
|
Calls `str.join` element-wise.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
sep : array_like of str or unicode
|
||
|
seq : array_like of str or unicode
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of str or unicode, depending on input types
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.join
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> np.char.join('-', 'osd')
|
||
|
array('o-s-d', dtype='<U5')
|
||
|
|
||
|
>>> np.char.join(['-', '.'], ['ghc', 'osd'])
|
||
|
array(['g-h-c', 'o.s.d'], dtype='<U5')
|
||
|
|
||
|
"""
|
||
|
return _to_bytes_or_str_array(
|
||
|
_vec_string(sep, object_, 'join', (seq,)), seq)
|
||
|
|
||
|
|
||
|
|
||
|
def _just_dispatcher(a, width, fillchar=None):
|
||
|
return (a,)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_just_dispatcher)
|
||
|
def ljust(a, width, fillchar=' '):
|
||
|
"""
|
||
|
Return an array with the elements of `a` left-justified in a
|
||
|
string of length `width`.
|
||
|
|
||
|
Calls `str.ljust` element-wise.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
width : int
|
||
|
The length of the resulting strings
|
||
|
fillchar : str or unicode, optional
|
||
|
The character to use for padding
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of str or unicode, depending on input type
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.ljust
|
||
|
|
||
|
"""
|
||
|
a_arr = numpy.asarray(a)
|
||
|
width_arr = numpy.asarray(width)
|
||
|
size = int(numpy.max(width_arr.flat))
|
||
|
if numpy.issubdtype(a_arr.dtype, numpy.bytes_):
|
||
|
fillchar = asbytes(fillchar)
|
||
|
return _vec_string(
|
||
|
a_arr, type(a_arr.dtype)(size), 'ljust', (width_arr, fillchar))
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_unary_op_dispatcher)
|
||
|
def lower(a):
|
||
|
"""
|
||
|
Return an array with the elements converted to lowercase.
|
||
|
|
||
|
Call `str.lower` element-wise.
|
||
|
|
||
|
For 8-bit strings, this method is locale-dependent.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like, {str, unicode}
|
||
|
Input array.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray, {str, unicode}
|
||
|
Output array of str or unicode, depending on input type
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.lower
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> c = np.array(['A1B C', '1BCA', 'BCA1']); c
|
||
|
array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
|
||
|
>>> np.char.lower(c)
|
||
|
array(['a1b c', '1bca', 'bca1'], dtype='<U5')
|
||
|
|
||
|
"""
|
||
|
a_arr = numpy.asarray(a)
|
||
|
return _vec_string(a_arr, a_arr.dtype, 'lower')
|
||
|
|
||
|
|
||
|
def _strip_dispatcher(a, chars=None):
|
||
|
return (a,)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_strip_dispatcher)
|
||
|
def lstrip(a, chars=None):
|
||
|
"""
|
||
|
For each element in `a`, return a copy with the leading characters
|
||
|
removed.
|
||
|
|
||
|
Calls `str.lstrip` element-wise.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array-like, {str, unicode}
|
||
|
Input array.
|
||
|
|
||
|
chars : {str, unicode}, optional
|
||
|
The `chars` argument is a string specifying the set of
|
||
|
characters to be removed. If omitted or None, the `chars`
|
||
|
argument defaults to removing whitespace. The `chars` argument
|
||
|
is not a prefix; rather, all combinations of its values are
|
||
|
stripped.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray, {str, unicode}
|
||
|
Output array of str or unicode, depending on input type
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.lstrip
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
|
||
|
>>> c
|
||
|
array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
|
||
|
|
||
|
The 'a' variable is unstripped from c[1] because whitespace leading.
|
||
|
|
||
|
>>> np.char.lstrip(c, 'a')
|
||
|
array(['AaAaA', ' aA ', 'bBABba'], dtype='<U7')
|
||
|
|
||
|
|
||
|
>>> np.char.lstrip(c, 'A') # leaves c unchanged
|
||
|
array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
|
||
|
>>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, '')).all()
|
||
|
... # XXX: is this a regression? This used to return True
|
||
|
... # np.char.lstrip(c,'') does not modify c at all.
|
||
|
False
|
||
|
>>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, None)).all()
|
||
|
True
|
||
|
|
||
|
"""
|
||
|
a_arr = numpy.asarray(a)
|
||
|
return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,))
|
||
|
|
||
|
|
||
|
def _partition_dispatcher(a, sep):
|
||
|
return (a,)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_partition_dispatcher)
|
||
|
def partition(a, sep):
|
||
|
"""
|
||
|
Partition each element in `a` around `sep`.
|
||
|
|
||
|
Calls `str.partition` element-wise.
|
||
|
|
||
|
For each element in `a`, split the element as the first
|
||
|
occurrence of `sep`, and return 3 strings containing the part
|
||
|
before the separator, the separator itself, and the part after
|
||
|
the separator. If the separator is not found, return 3 strings
|
||
|
containing the string itself, followed by two empty strings.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like, {str, unicode}
|
||
|
Input array
|
||
|
sep : {str, unicode}
|
||
|
Separator to split each string element in `a`.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray, {str, unicode}
|
||
|
Output array of str or unicode, depending on input type.
|
||
|
The output array will have an extra dimension with 3
|
||
|
elements per input element.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.partition
|
||
|
|
||
|
"""
|
||
|
return _to_bytes_or_str_array(
|
||
|
_vec_string(a, object_, 'partition', (sep,)), a)
|
||
|
|
||
|
|
||
|
def _replace_dispatcher(a, old, new, count=None):
|
||
|
return (a,)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_replace_dispatcher)
|
||
|
def replace(a, old, new, count=None):
|
||
|
"""
|
||
|
For each element in `a`, return a copy of the string with all
|
||
|
occurrences of substring `old` replaced by `new`.
|
||
|
|
||
|
Calls `str.replace` element-wise.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array-like of str or unicode
|
||
|
|
||
|
old, new : str or unicode
|
||
|
|
||
|
count : int, optional
|
||
|
If the optional argument `count` is given, only the first
|
||
|
`count` occurrences are replaced.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of str or unicode, depending on input type
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.replace
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> a = np.array(["That is a mango", "Monkeys eat mangos"])
|
||
|
>>> np.char.replace(a, 'mango', 'banana')
|
||
|
array(['That is a banana', 'Monkeys eat bananas'], dtype='<U19')
|
||
|
|
||
|
>>> a = np.array(["The dish is fresh", "This is it"])
|
||
|
>>> np.char.replace(a, 'is', 'was')
|
||
|
array(['The dwash was fresh', 'Thwas was it'], dtype='<U19')
|
||
|
"""
|
||
|
return _to_bytes_or_str_array(
|
||
|
_vec_string(a, object_, 'replace', [old, new] + _clean_args(count)), a)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_count_dispatcher)
|
||
|
def rfind(a, sub, start=0, end=None):
|
||
|
"""
|
||
|
For each element in `a`, return the highest index in the string
|
||
|
where substring `sub` is found, such that `sub` is contained
|
||
|
within [`start`, `end`].
|
||
|
|
||
|
Calls `str.rfind` element-wise.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array-like of str or unicode
|
||
|
|
||
|
sub : str or unicode
|
||
|
|
||
|
start, end : int, optional
|
||
|
Optional arguments `start` and `end` are interpreted as in
|
||
|
slice notation.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of ints. Return -1 on failure.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.rfind
|
||
|
|
||
|
"""
|
||
|
return _vec_string(
|
||
|
a, int_, 'rfind', [sub, start] + _clean_args(end))
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_count_dispatcher)
|
||
|
def rindex(a, sub, start=0, end=None):
|
||
|
"""
|
||
|
Like `rfind`, but raises `ValueError` when the substring `sub` is
|
||
|
not found.
|
||
|
|
||
|
Calls `str.rindex` element-wise.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array-like of str or unicode
|
||
|
|
||
|
sub : str or unicode
|
||
|
|
||
|
start, end : int, optional
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of ints.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
rfind, str.rindex
|
||
|
|
||
|
"""
|
||
|
return _vec_string(
|
||
|
a, int_, 'rindex', [sub, start] + _clean_args(end))
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_just_dispatcher)
|
||
|
def rjust(a, width, fillchar=' '):
|
||
|
"""
|
||
|
Return an array with the elements of `a` right-justified in a
|
||
|
string of length `width`.
|
||
|
|
||
|
Calls `str.rjust` element-wise.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
width : int
|
||
|
The length of the resulting strings
|
||
|
fillchar : str or unicode, optional
|
||
|
The character to use for padding
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of str or unicode, depending on input type
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.rjust
|
||
|
|
||
|
"""
|
||
|
a_arr = numpy.asarray(a)
|
||
|
width_arr = numpy.asarray(width)
|
||
|
size = int(numpy.max(width_arr.flat))
|
||
|
if numpy.issubdtype(a_arr.dtype, numpy.bytes_):
|
||
|
fillchar = asbytes(fillchar)
|
||
|
return _vec_string(
|
||
|
a_arr, type(a_arr.dtype)(size), 'rjust', (width_arr, fillchar))
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_partition_dispatcher)
|
||
|
def rpartition(a, sep):
|
||
|
"""
|
||
|
Partition (split) each element around the right-most separator.
|
||
|
|
||
|
Calls `str.rpartition` element-wise.
|
||
|
|
||
|
For each element in `a`, split the element as the last
|
||
|
occurrence of `sep`, and return 3 strings containing the part
|
||
|
before the separator, the separator itself, and the part after
|
||
|
the separator. If the separator is not found, return 3 strings
|
||
|
containing the string itself, followed by two empty strings.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
Input array
|
||
|
sep : str or unicode
|
||
|
Right-most separator to split each element in array.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of string or unicode, depending on input
|
||
|
type. The output array will have an extra dimension with
|
||
|
3 elements per input element.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.rpartition
|
||
|
|
||
|
"""
|
||
|
return _to_bytes_or_str_array(
|
||
|
_vec_string(a, object_, 'rpartition', (sep,)), a)
|
||
|
|
||
|
|
||
|
def _split_dispatcher(a, sep=None, maxsplit=None):
|
||
|
return (a,)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_split_dispatcher)
|
||
|
def rsplit(a, sep=None, maxsplit=None):
|
||
|
"""
|
||
|
For each element in `a`, return a list of the words in the
|
||
|
string, using `sep` as the delimiter string.
|
||
|
|
||
|
Calls `str.rsplit` element-wise.
|
||
|
|
||
|
Except for splitting from the right, `rsplit`
|
||
|
behaves like `split`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
sep : str or unicode, optional
|
||
|
If `sep` is not specified or None, any whitespace string
|
||
|
is a separator.
|
||
|
maxsplit : int, optional
|
||
|
If `maxsplit` is given, at most `maxsplit` splits are done,
|
||
|
the rightmost ones.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Array of list objects
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.rsplit, split
|
||
|
|
||
|
"""
|
||
|
# This will return an array of lists of different sizes, so we
|
||
|
# leave it as an object array
|
||
|
return _vec_string(
|
||
|
a, object_, 'rsplit', [sep] + _clean_args(maxsplit))
|
||
|
|
||
|
|
||
|
def _strip_dispatcher(a, chars=None):
|
||
|
return (a,)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_strip_dispatcher)
|
||
|
def rstrip(a, chars=None):
|
||
|
"""
|
||
|
For each element in `a`, return a copy with the trailing
|
||
|
characters removed.
|
||
|
|
||
|
Calls `str.rstrip` element-wise.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array-like of str or unicode
|
||
|
|
||
|
chars : str or unicode, optional
|
||
|
The `chars` argument is a string specifying the set of
|
||
|
characters to be removed. If omitted or None, the `chars`
|
||
|
argument defaults to removing whitespace. The `chars` argument
|
||
|
is not a suffix; rather, all combinations of its values are
|
||
|
stripped.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of str or unicode, depending on input type
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.rstrip
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7'); c
|
||
|
array(['aAaAaA', 'abBABba'],
|
||
|
dtype='|S7')
|
||
|
>>> np.char.rstrip(c, b'a')
|
||
|
array(['aAaAaA', 'abBABb'],
|
||
|
dtype='|S7')
|
||
|
>>> np.char.rstrip(c, b'A')
|
||
|
array(['aAaAa', 'abBABba'],
|
||
|
dtype='|S7')
|
||
|
|
||
|
"""
|
||
|
a_arr = numpy.asarray(a)
|
||
|
return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,))
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_split_dispatcher)
|
||
|
def split(a, sep=None, maxsplit=None):
|
||
|
"""
|
||
|
For each element in `a`, return a list of the words in the
|
||
|
string, using `sep` as the delimiter string.
|
||
|
|
||
|
Calls `str.split` element-wise.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
sep : str or unicode, optional
|
||
|
If `sep` is not specified or None, any whitespace string is a
|
||
|
separator.
|
||
|
|
||
|
maxsplit : int, optional
|
||
|
If `maxsplit` is given, at most `maxsplit` splits are done.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Array of list objects
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.split, rsplit
|
||
|
|
||
|
"""
|
||
|
# This will return an array of lists of different sizes, so we
|
||
|
# leave it as an object array
|
||
|
return _vec_string(
|
||
|
a, object_, 'split', [sep] + _clean_args(maxsplit))
|
||
|
|
||
|
|
||
|
def _splitlines_dispatcher(a, keepends=None):
|
||
|
return (a,)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_splitlines_dispatcher)
|
||
|
def splitlines(a, keepends=None):
|
||
|
"""
|
||
|
For each element in `a`, return a list of the lines in the
|
||
|
element, breaking at line boundaries.
|
||
|
|
||
|
Calls `str.splitlines` element-wise.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
keepends : bool, optional
|
||
|
Line breaks are not included in the resulting list unless
|
||
|
keepends is given and true.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Array of list objects
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.splitlines
|
||
|
|
||
|
"""
|
||
|
return _vec_string(
|
||
|
a, object_, 'splitlines', _clean_args(keepends))
|
||
|
|
||
|
|
||
|
def _startswith_dispatcher(a, prefix, start=None, end=None):
|
||
|
return (a,)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_startswith_dispatcher)
|
||
|
def startswith(a, prefix, start=0, end=None):
|
||
|
"""
|
||
|
Returns a boolean array which is `True` where the string element
|
||
|
in `a` starts with `prefix`, otherwise `False`.
|
||
|
|
||
|
Calls `str.startswith` element-wise.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like of str or unicode
|
||
|
|
||
|
prefix : str
|
||
|
|
||
|
start, end : int, optional
|
||
|
With optional `start`, test beginning at that position. With
|
||
|
optional `end`, stop comparing at that position.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Array of booleans
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.startswith
|
||
|
|
||
|
"""
|
||
|
return _vec_string(
|
||
|
a, bool_, 'startswith', [prefix, start] + _clean_args(end))
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_strip_dispatcher)
|
||
|
def strip(a, chars=None):
|
||
|
"""
|
||
|
For each element in `a`, return a copy with the leading and
|
||
|
trailing characters removed.
|
||
|
|
||
|
Calls `str.strip` element-wise.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array-like of str or unicode
|
||
|
|
||
|
chars : str or unicode, optional
|
||
|
The `chars` argument is a string specifying the set of
|
||
|
characters to be removed. If omitted or None, the `chars`
|
||
|
argument defaults to removing whitespace. The `chars` argument
|
||
|
is not a prefix or suffix; rather, all combinations of its
|
||
|
values are stripped.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of str or unicode, depending on input type
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.strip
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
|
||
|
>>> c
|
||
|
array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
|
||
|
>>> np.char.strip(c)
|
||
|
array(['aAaAaA', 'aA', 'abBABba'], dtype='<U7')
|
||
|
>>> np.char.strip(c, 'a') # 'a' unstripped from c[1] because whitespace leads
|
||
|
array(['AaAaA', ' aA ', 'bBABb'], dtype='<U7')
|
||
|
>>> np.char.strip(c, 'A') # 'A' unstripped from c[1] because (unprinted) ws trails
|
||
|
array(['aAaAa', ' aA ', 'abBABba'], dtype='<U7')
|
||
|
|
||
|
"""
|
||
|
a_arr = numpy.asarray(a)
|
||
|
return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars))
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_unary_op_dispatcher)
|
||
|
def swapcase(a):
|
||
|
"""
|
||
|
Return element-wise a copy of the string with
|
||
|
uppercase characters converted to lowercase and vice versa.
|
||
|
|
||
|
Calls `str.swapcase` element-wise.
|
||
|
|
||
|
For 8-bit strings, this method is locale-dependent.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like, {str, unicode}
|
||
|
Input array.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray, {str, unicode}
|
||
|
Output array of str or unicode, depending on input type
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.swapcase
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5'); c
|
||
|
array(['a1B c', '1b Ca', 'b Ca1', 'cA1b'],
|
||
|
dtype='|S5')
|
||
|
>>> np.char.swapcase(c)
|
||
|
array(['A1b C', '1B cA', 'B cA1', 'Ca1B'],
|
||
|
dtype='|S5')
|
||
|
|
||
|
"""
|
||
|
a_arr = numpy.asarray(a)
|
||
|
return _vec_string(a_arr, a_arr.dtype, 'swapcase')
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_unary_op_dispatcher)
|
||
|
def title(a):
|
||
|
"""
|
||
|
Return element-wise title cased version of string or unicode.
|
||
|
|
||
|
Title case words start with uppercase characters, all remaining cased
|
||
|
characters are lowercase.
|
||
|
|
||
|
Calls `str.title` element-wise.
|
||
|
|
||
|
For 8-bit strings, this method is locale-dependent.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like, {str, unicode}
|
||
|
Input array.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of str or unicode, depending on input type
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.title
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5'); c
|
||
|
array(['a1b c', '1b ca', 'b ca1', 'ca1b'],
|
||
|
dtype='|S5')
|
||
|
>>> np.char.title(c)
|
||
|
array(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'],
|
||
|
dtype='|S5')
|
||
|
|
||
|
"""
|
||
|
a_arr = numpy.asarray(a)
|
||
|
return _vec_string(a_arr, a_arr.dtype, 'title')
|
||
|
|
||
|
|
||
|
def _translate_dispatcher(a, table, deletechars=None):
|
||
|
return (a,)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_translate_dispatcher)
|
||
|
def translate(a, table, deletechars=None):
|
||
|
"""
|
||
|
For each element in `a`, return a copy of the string where all
|
||
|
characters occurring in the optional argument `deletechars` are
|
||
|
removed, and the remaining characters have been mapped through the
|
||
|
given translation table.
|
||
|
|
||
|
Calls `str.translate` element-wise.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array-like of str or unicode
|
||
|
|
||
|
table : str of length 256
|
||
|
|
||
|
deletechars : str
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray
|
||
|
Output array of str or unicode, depending on input type
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.translate
|
||
|
|
||
|
"""
|
||
|
a_arr = numpy.asarray(a)
|
||
|
if issubclass(a_arr.dtype.type, str_):
|
||
|
return _vec_string(
|
||
|
a_arr, a_arr.dtype, 'translate', (table,))
|
||
|
else:
|
||
|
return _vec_string(
|
||
|
a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars))
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_unary_op_dispatcher)
|
||
|
def upper(a):
|
||
|
"""
|
||
|
Return an array with the elements converted to uppercase.
|
||
|
|
||
|
Calls `str.upper` element-wise.
|
||
|
|
||
|
For 8-bit strings, this method is locale-dependent.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like, {str, unicode}
|
||
|
Input array.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray, {str, unicode}
|
||
|
Output array of str or unicode, depending on input type
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.upper
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> c = np.array(['a1b c', '1bca', 'bca1']); c
|
||
|
array(['a1b c', '1bca', 'bca1'], dtype='<U5')
|
||
|
>>> np.char.upper(c)
|
||
|
array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
|
||
|
|
||
|
"""
|
||
|
a_arr = numpy.asarray(a)
|
||
|
return _vec_string(a_arr, a_arr.dtype, 'upper')
|
||
|
|
||
|
|
||
|
def _zfill_dispatcher(a, width):
|
||
|
return (a,)
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_zfill_dispatcher)
|
||
|
def zfill(a, width):
|
||
|
"""
|
||
|
Return the numeric string left-filled with zeros
|
||
|
|
||
|
Calls `str.zfill` element-wise.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like, {str, unicode}
|
||
|
Input array.
|
||
|
width : int
|
||
|
Width of string to left-fill elements in `a`.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray, {str, unicode}
|
||
|
Output array of str or unicode, depending on input type
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.zfill
|
||
|
|
||
|
"""
|
||
|
a_arr = numpy.asarray(a)
|
||
|
width_arr = numpy.asarray(width)
|
||
|
size = int(numpy.max(width_arr.flat))
|
||
|
return _vec_string(
|
||
|
a_arr, type(a_arr.dtype)(size), 'zfill', (width_arr,))
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_unary_op_dispatcher)
|
||
|
def isnumeric(a):
|
||
|
"""
|
||
|
For each element, return True if there are only numeric
|
||
|
characters in the element.
|
||
|
|
||
|
Calls `str.isnumeric` element-wise.
|
||
|
|
||
|
Numeric characters include digit characters, and all characters
|
||
|
that have the Unicode numeric value property, e.g. ``U+2155,
|
||
|
VULGAR FRACTION ONE FIFTH``.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like, unicode
|
||
|
Input array.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray, bool
|
||
|
Array of booleans of same shape as `a`.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.isnumeric
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> np.char.isnumeric(['123', '123abc', '9.0', '1/4', 'VIII'])
|
||
|
array([ True, False, False, False, False])
|
||
|
|
||
|
"""
|
||
|
if not _is_unicode(a):
|
||
|
raise TypeError("isnumeric is only available for Unicode strings and arrays")
|
||
|
return _vec_string(a, bool_, 'isnumeric')
|
||
|
|
||
|
|
||
|
@array_function_dispatch(_unary_op_dispatcher)
|
||
|
def isdecimal(a):
|
||
|
"""
|
||
|
For each element, return True if there are only decimal
|
||
|
characters in the element.
|
||
|
|
||
|
Calls `str.isdecimal` element-wise.
|
||
|
|
||
|
Decimal characters include digit characters, and all characters
|
||
|
that can be used to form decimal-radix numbers,
|
||
|
e.g. ``U+0660, ARABIC-INDIC DIGIT ZERO``.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : array_like, unicode
|
||
|
Input array.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
out : ndarray, bool
|
||
|
Array of booleans identical in shape to `a`.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
str.isdecimal
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> np.char.isdecimal(['12345', '4.99', '123ABC', ''])
|
||
|
array([ True, False, False, False])
|
||
|
|
||
|
"""
|
||
|
if not _is_unicode(a):
|
||
|
raise TypeError(
|
||
|
"isdecimal is only available for Unicode strings and arrays")
|
||
|
return _vec_string(a, bool_, 'isdecimal')
|
||
|
|
||
|
|
||
|
@set_module('numpy')
|
||
|
class chararray(ndarray):
|
||
|
"""
|
||
|
chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0,
|
||
|
strides=None, order=None)
|
||
|
|
||
|
Provides a convenient view on arrays of string and unicode values.
|
||
|
|
||
|
.. note::
|
||
|
The `chararray` class exists for backwards compatibility with
|
||
|
Numarray, it is not recommended for new development. Starting from numpy
|
||
|
1.4, if one needs arrays of strings, it is recommended to use arrays of
|
||
|
`dtype` `object_`, `bytes_` or `str_`, and use the free functions
|
||
|
in the `numpy.char` module for fast vectorized string operations.
|
||
|
|
||
|
Versus a regular NumPy array of type `str` or `unicode`, this
|
||
|
class adds the following functionality:
|
||
|
|
||
|
1) values automatically have whitespace removed from the end
|
||
|
when indexed
|
||
|
|
||
|
2) comparison operators automatically remove whitespace from the
|
||
|
end when comparing values
|
||
|
|
||
|
3) vectorized string operations are provided as methods
|
||
|
(e.g. `.endswith`) and infix operators (e.g. ``"+", "*", "%"``)
|
||
|
|
||
|
chararrays should be created using `numpy.char.array` or
|
||
|
`numpy.char.asarray`, rather than this constructor directly.
|
||
|
|
||
|
This constructor creates the array, using `buffer` (with `offset`
|
||
|
and `strides`) if it is not ``None``. If `buffer` is ``None``, then
|
||
|
constructs a new array with `strides` in "C order", unless both
|
||
|
``len(shape) >= 2`` and ``order='F'``, in which case `strides`
|
||
|
is in "Fortran order".
|
||
|
|
||
|
Methods
|
||
|
-------
|
||
|
astype
|
||
|
argsort
|
||
|
copy
|
||
|
count
|
||
|
decode
|
||
|
dump
|
||
|
dumps
|
||
|
encode
|
||
|
endswith
|
||
|
expandtabs
|
||
|
fill
|
||
|
find
|
||
|
flatten
|
||
|
getfield
|
||
|
index
|
||
|
isalnum
|
||
|
isalpha
|
||
|
isdecimal
|
||
|
isdigit
|
||
|
islower
|
||
|
isnumeric
|
||
|
isspace
|
||
|
istitle
|
||
|
isupper
|
||
|
item
|
||
|
join
|
||
|
ljust
|
||
|
lower
|
||
|
lstrip
|
||
|
nonzero
|
||
|
put
|
||
|
ravel
|
||
|
repeat
|
||
|
replace
|
||
|
reshape
|
||
|
resize
|
||
|
rfind
|
||
|
rindex
|
||
|
rjust
|
||
|
rsplit
|
||
|
rstrip
|
||
|
searchsorted
|
||
|
setfield
|
||
|
setflags
|
||
|
sort
|
||
|
split
|
||
|
splitlines
|
||
|
squeeze
|
||
|
startswith
|
||
|
strip
|
||
|
swapaxes
|
||
|
swapcase
|
||
|
take
|
||
|
title
|
||
|
tofile
|
||
|
tolist
|
||
|
tostring
|
||
|
translate
|
||
|
transpose
|
||
|
upper
|
||
|
view
|
||
|
zfill
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
shape : tuple
|
||
|
Shape of the array.
|
||
|
itemsize : int, optional
|
||
|
Length of each array element, in number of characters. Default is 1.
|
||
|
unicode : bool, optional
|
||
|
Are the array elements of type unicode (True) or string (False).
|
||
|
Default is False.
|
||
|
buffer : object exposing the buffer interface or str, optional
|
||
|
Memory address of the start of the array data. Default is None,
|
||
|
in which case a new array is created.
|
||
|
offset : int, optional
|
||
|
Fixed stride displacement from the beginning of an axis?
|
||
|
Default is 0. Needs to be >=0.
|
||
|
strides : array_like of ints, optional
|
||
|
Strides for the array (see `ndarray.strides` for full description).
|
||
|
Default is None.
|
||
|
order : {'C', 'F'}, optional
|
||
|
The order in which the array data is stored in memory: 'C' ->
|
||
|
"row major" order (the default), 'F' -> "column major"
|
||
|
(Fortran) order.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> charar = np.chararray((3, 3))
|
||
|
>>> charar[:] = 'a'
|
||
|
>>> charar
|
||
|
chararray([[b'a', b'a', b'a'],
|
||
|
[b'a', b'a', b'a'],
|
||
|
[b'a', b'a', b'a']], dtype='|S1')
|
||
|
|
||
|
>>> charar = np.chararray(charar.shape, itemsize=5)
|
||
|
>>> charar[:] = 'abc'
|
||
|
>>> charar
|
||
|
chararray([[b'abc', b'abc', b'abc'],
|
||
|
[b'abc', b'abc', b'abc'],
|
||
|
[b'abc', b'abc', b'abc']], dtype='|S5')
|
||
|
|
||
|
"""
|
||
|
def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None,
|
||
|
offset=0, strides=None, order='C'):
|
||
|
global _globalvar
|
||
|
|
||
|
if unicode:
|
||
|
dtype = str_
|
||
|
else:
|
||
|
dtype = bytes_
|
||
|
|
||
|
# force itemsize to be a Python int, since using NumPy integer
|
||
|
# types results in itemsize.itemsize being used as the size of
|
||
|
# strings in the new array.
|
||
|
itemsize = int(itemsize)
|
||
|
|
||
|
if isinstance(buffer, str):
|
||
|
# unicode objects do not have the buffer interface
|
||
|
filler = buffer
|
||
|
buffer = None
|
||
|
else:
|
||
|
filler = None
|
||
|
|
||
|
_globalvar = 1
|
||
|
if buffer is None:
|
||
|
self = ndarray.__new__(subtype, shape, (dtype, itemsize),
|
||
|
order=order)
|
||
|
else:
|
||
|
self = ndarray.__new__(subtype, shape, (dtype, itemsize),
|
||
|
buffer=buffer,
|
||
|
offset=offset, strides=strides,
|
||
|
order=order)
|
||
|
if filler is not None:
|
||
|
self[...] = filler
|
||
|
_globalvar = 0
|
||
|
return self
|
||
|
|
||
|
def __array_finalize__(self, obj):
|
||
|
# The b is a special case because it is used for reconstructing.
|
||
|
if not _globalvar and self.dtype.char not in 'SUbc':
|
||
|
raise ValueError("Can only create a chararray from string data.")
|
||
|
|
||
|
def __getitem__(self, obj):
|
||
|
val = ndarray.__getitem__(self, obj)
|
||
|
|
||
|
if isinstance(val, character):
|
||
|
temp = val.rstrip()
|
||
|
if len(temp) == 0:
|
||
|
val = ''
|
||
|
else:
|
||
|
val = temp
|
||
|
|
||
|
return val
|
||
|
|
||
|
# IMPLEMENTATION NOTE: Most of the methods of this class are
|
||
|
# direct delegations to the free functions in this module.
|
||
|
# However, those that return an array of strings should instead
|
||
|
# return a chararray, so some extra wrapping is required.
|
||
|
|
||
|
def __eq__(self, other):
|
||
|
"""
|
||
|
Return (self == other) element-wise.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
equal
|
||
|
"""
|
||
|
return equal(self, other)
|
||
|
|
||
|
def __ne__(self, other):
|
||
|
"""
|
||
|
Return (self != other) element-wise.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
not_equal
|
||
|
"""
|
||
|
return not_equal(self, other)
|
||
|
|
||
|
def __ge__(self, other):
|
||
|
"""
|
||
|
Return (self >= other) element-wise.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
greater_equal
|
||
|
"""
|
||
|
return greater_equal(self, other)
|
||
|
|
||
|
def __le__(self, other):
|
||
|
"""
|
||
|
Return (self <= other) element-wise.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
less_equal
|
||
|
"""
|
||
|
return less_equal(self, other)
|
||
|
|
||
|
def __gt__(self, other):
|
||
|
"""
|
||
|
Return (self > other) element-wise.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
greater
|
||
|
"""
|
||
|
return greater(self, other)
|
||
|
|
||
|
def __lt__(self, other):
|
||
|
"""
|
||
|
Return (self < other) element-wise.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
less
|
||
|
"""
|
||
|
return less(self, other)
|
||
|
|
||
|
def __add__(self, other):
|
||
|
"""
|
||
|
Return (self + other), that is string concatenation,
|
||
|
element-wise for a pair of array_likes of str or unicode.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
add
|
||
|
"""
|
||
|
return asarray(add(self, other))
|
||
|
|
||
|
def __radd__(self, other):
|
||
|
"""
|
||
|
Return (other + self), that is string concatenation,
|
||
|
element-wise for a pair of array_likes of `bytes_` or `str_`.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
add
|
||
|
"""
|
||
|
return asarray(add(numpy.asarray(other), self))
|
||
|
|
||
|
def __mul__(self, i):
|
||
|
"""
|
||
|
Return (self * i), that is string multiple concatenation,
|
||
|
element-wise.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
multiply
|
||
|
"""
|
||
|
return asarray(multiply(self, i))
|
||
|
|
||
|
def __rmul__(self, i):
|
||
|
"""
|
||
|
Return (self * i), that is string multiple concatenation,
|
||
|
element-wise.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
multiply
|
||
|
"""
|
||
|
return asarray(multiply(self, i))
|
||
|
|
||
|
def __mod__(self, i):
|
||
|
"""
|
||
|
Return (self % i), that is pre-Python 2.6 string formatting
|
||
|
(interpolation), element-wise for a pair of array_likes of `bytes_`
|
||
|
or `str_`.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
mod
|
||
|
"""
|
||
|
return asarray(mod(self, i))
|
||
|
|
||
|
def __rmod__(self, other):
|
||
|
return NotImplemented
|
||
|
|
||
|
def argsort(self, axis=-1, kind=None, order=None):
|
||
|
"""
|
||
|
Return the indices that sort the array lexicographically.
|
||
|
|
||
|
For full documentation see `numpy.argsort`, for which this method is
|
||
|
in fact merely a "thin wrapper."
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> c = np.array(['a1b c', '1b ca', 'b ca1', 'Ca1b'], 'S5')
|
||
|
>>> c = c.view(np.chararray); c
|
||
|
chararray(['a1b c', '1b ca', 'b ca1', 'Ca1b'],
|
||
|
dtype='|S5')
|
||
|
>>> c[c.argsort()]
|
||
|
chararray(['1b ca', 'Ca1b', 'a1b c', 'b ca1'],
|
||
|
dtype='|S5')
|
||
|
|
||
|
"""
|
||
|
return self.__array__().argsort(axis, kind, order)
|
||
|
argsort.__doc__ = ndarray.argsort.__doc__
|
||
|
|
||
|
def capitalize(self):
|
||
|
"""
|
||
|
Return a copy of `self` with only the first character of each element
|
||
|
capitalized.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.capitalize
|
||
|
|
||
|
"""
|
||
|
return asarray(capitalize(self))
|
||
|
|
||
|
def center(self, width, fillchar=' '):
|
||
|
"""
|
||
|
Return a copy of `self` with its elements centered in a
|
||
|
string of length `width`.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
center
|
||
|
"""
|
||
|
return asarray(center(self, width, fillchar))
|
||
|
|
||
|
def count(self, sub, start=0, end=None):
|
||
|
"""
|
||
|
Returns an array with the number of non-overlapping occurrences of
|
||
|
substring `sub` in the range [`start`, `end`].
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.count
|
||
|
|
||
|
"""
|
||
|
return count(self, sub, start, end)
|
||
|
|
||
|
def decode(self, encoding=None, errors=None):
|
||
|
"""
|
||
|
Calls ``bytes.decode`` element-wise.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.decode
|
||
|
|
||
|
"""
|
||
|
return decode(self, encoding, errors)
|
||
|
|
||
|
def encode(self, encoding=None, errors=None):
|
||
|
"""
|
||
|
Calls `str.encode` element-wise.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.encode
|
||
|
|
||
|
"""
|
||
|
return encode(self, encoding, errors)
|
||
|
|
||
|
def endswith(self, suffix, start=0, end=None):
|
||
|
"""
|
||
|
Returns a boolean array which is `True` where the string element
|
||
|
in `self` ends with `suffix`, otherwise `False`.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.endswith
|
||
|
|
||
|
"""
|
||
|
return endswith(self, suffix, start, end)
|
||
|
|
||
|
def expandtabs(self, tabsize=8):
|
||
|
"""
|
||
|
Return a copy of each string element where all tab characters are
|
||
|
replaced by one or more spaces.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.expandtabs
|
||
|
|
||
|
"""
|
||
|
return asarray(expandtabs(self, tabsize))
|
||
|
|
||
|
def find(self, sub, start=0, end=None):
|
||
|
"""
|
||
|
For each element, return the lowest index in the string where
|
||
|
substring `sub` is found.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.find
|
||
|
|
||
|
"""
|
||
|
return find(self, sub, start, end)
|
||
|
|
||
|
def index(self, sub, start=0, end=None):
|
||
|
"""
|
||
|
Like `find`, but raises `ValueError` when the substring is not found.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.index
|
||
|
|
||
|
"""
|
||
|
return index(self, sub, start, end)
|
||
|
|
||
|
def isalnum(self):
|
||
|
"""
|
||
|
Returns true for each element if all characters in the string
|
||
|
are alphanumeric and there is at least one character, false
|
||
|
otherwise.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.isalnum
|
||
|
|
||
|
"""
|
||
|
return isalnum(self)
|
||
|
|
||
|
def isalpha(self):
|
||
|
"""
|
||
|
Returns true for each element if all characters in the string
|
||
|
are alphabetic and there is at least one character, false
|
||
|
otherwise.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.isalpha
|
||
|
|
||
|
"""
|
||
|
return isalpha(self)
|
||
|
|
||
|
def isdigit(self):
|
||
|
"""
|
||
|
Returns true for each element if all characters in the string are
|
||
|
digits and there is at least one character, false otherwise.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.isdigit
|
||
|
|
||
|
"""
|
||
|
return isdigit(self)
|
||
|
|
||
|
def islower(self):
|
||
|
"""
|
||
|
Returns true for each element if all cased characters in the
|
||
|
string are lowercase and there is at least one cased character,
|
||
|
false otherwise.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.islower
|
||
|
|
||
|
"""
|
||
|
return islower(self)
|
||
|
|
||
|
def isspace(self):
|
||
|
"""
|
||
|
Returns true for each element if there are only whitespace
|
||
|
characters in the string and there is at least one character,
|
||
|
false otherwise.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.isspace
|
||
|
|
||
|
"""
|
||
|
return isspace(self)
|
||
|
|
||
|
def istitle(self):
|
||
|
"""
|
||
|
Returns true for each element if the element is a titlecased
|
||
|
string and there is at least one character, false otherwise.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.istitle
|
||
|
|
||
|
"""
|
||
|
return istitle(self)
|
||
|
|
||
|
def isupper(self):
|
||
|
"""
|
||
|
Returns true for each element if all cased characters in the
|
||
|
string are uppercase and there is at least one character, false
|
||
|
otherwise.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.isupper
|
||
|
|
||
|
"""
|
||
|
return isupper(self)
|
||
|
|
||
|
def join(self, seq):
|
||
|
"""
|
||
|
Return a string which is the concatenation of the strings in the
|
||
|
sequence `seq`.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.join
|
||
|
|
||
|
"""
|
||
|
return join(self, seq)
|
||
|
|
||
|
def ljust(self, width, fillchar=' '):
|
||
|
"""
|
||
|
Return an array with the elements of `self` left-justified in a
|
||
|
string of length `width`.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.ljust
|
||
|
|
||
|
"""
|
||
|
return asarray(ljust(self, width, fillchar))
|
||
|
|
||
|
def lower(self):
|
||
|
"""
|
||
|
Return an array with the elements of `self` converted to
|
||
|
lowercase.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.lower
|
||
|
|
||
|
"""
|
||
|
return asarray(lower(self))
|
||
|
|
||
|
def lstrip(self, chars=None):
|
||
|
"""
|
||
|
For each element in `self`, return a copy with the leading characters
|
||
|
removed.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.lstrip
|
||
|
|
||
|
"""
|
||
|
return asarray(lstrip(self, chars))
|
||
|
|
||
|
def partition(self, sep):
|
||
|
"""
|
||
|
Partition each element in `self` around `sep`.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
partition
|
||
|
"""
|
||
|
return asarray(partition(self, sep))
|
||
|
|
||
|
def replace(self, old, new, count=None):
|
||
|
"""
|
||
|
For each element in `self`, return a copy of the string with all
|
||
|
occurrences of substring `old` replaced by `new`.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.replace
|
||
|
|
||
|
"""
|
||
|
return asarray(replace(self, old, new, count))
|
||
|
|
||
|
def rfind(self, sub, start=0, end=None):
|
||
|
"""
|
||
|
For each element in `self`, return the highest index in the string
|
||
|
where substring `sub` is found, such that `sub` is contained
|
||
|
within [`start`, `end`].
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.rfind
|
||
|
|
||
|
"""
|
||
|
return rfind(self, sub, start, end)
|
||
|
|
||
|
def rindex(self, sub, start=0, end=None):
|
||
|
"""
|
||
|
Like `rfind`, but raises `ValueError` when the substring `sub` is
|
||
|
not found.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.rindex
|
||
|
|
||
|
"""
|
||
|
return rindex(self, sub, start, end)
|
||
|
|
||
|
def rjust(self, width, fillchar=' '):
|
||
|
"""
|
||
|
Return an array with the elements of `self`
|
||
|
right-justified in a string of length `width`.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.rjust
|
||
|
|
||
|
"""
|
||
|
return asarray(rjust(self, width, fillchar))
|
||
|
|
||
|
def rpartition(self, sep):
|
||
|
"""
|
||
|
Partition each element in `self` around `sep`.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
rpartition
|
||
|
"""
|
||
|
return asarray(rpartition(self, sep))
|
||
|
|
||
|
def rsplit(self, sep=None, maxsplit=None):
|
||
|
"""
|
||
|
For each element in `self`, return a list of the words in
|
||
|
the string, using `sep` as the delimiter string.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.rsplit
|
||
|
|
||
|
"""
|
||
|
return rsplit(self, sep, maxsplit)
|
||
|
|
||
|
def rstrip(self, chars=None):
|
||
|
"""
|
||
|
For each element in `self`, return a copy with the trailing
|
||
|
characters removed.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.rstrip
|
||
|
|
||
|
"""
|
||
|
return asarray(rstrip(self, chars))
|
||
|
|
||
|
def split(self, sep=None, maxsplit=None):
|
||
|
"""
|
||
|
For each element in `self`, return a list of the words in the
|
||
|
string, using `sep` as the delimiter string.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.split
|
||
|
|
||
|
"""
|
||
|
return split(self, sep, maxsplit)
|
||
|
|
||
|
def splitlines(self, keepends=None):
|
||
|
"""
|
||
|
For each element in `self`, return a list of the lines in the
|
||
|
element, breaking at line boundaries.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.splitlines
|
||
|
|
||
|
"""
|
||
|
return splitlines(self, keepends)
|
||
|
|
||
|
def startswith(self, prefix, start=0, end=None):
|
||
|
"""
|
||
|
Returns a boolean array which is `True` where the string element
|
||
|
in `self` starts with `prefix`, otherwise `False`.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.startswith
|
||
|
|
||
|
"""
|
||
|
return startswith(self, prefix, start, end)
|
||
|
|
||
|
def strip(self, chars=None):
|
||
|
"""
|
||
|
For each element in `self`, return a copy with the leading and
|
||
|
trailing characters removed.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.strip
|
||
|
|
||
|
"""
|
||
|
return asarray(strip(self, chars))
|
||
|
|
||
|
def swapcase(self):
|
||
|
"""
|
||
|
For each element in `self`, return a copy of the string with
|
||
|
uppercase characters converted to lowercase and vice versa.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.swapcase
|
||
|
|
||
|
"""
|
||
|
return asarray(swapcase(self))
|
||
|
|
||
|
def title(self):
|
||
|
"""
|
||
|
For each element in `self`, return a titlecased version of the
|
||
|
string: words start with uppercase characters, all remaining cased
|
||
|
characters are lowercase.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.title
|
||
|
|
||
|
"""
|
||
|
return asarray(title(self))
|
||
|
|
||
|
def translate(self, table, deletechars=None):
|
||
|
"""
|
||
|
For each element in `self`, return a copy of the string where
|
||
|
all characters occurring in the optional argument
|
||
|
`deletechars` are removed, and the remaining characters have
|
||
|
been mapped through the given translation table.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.translate
|
||
|
|
||
|
"""
|
||
|
return asarray(translate(self, table, deletechars))
|
||
|
|
||
|
def upper(self):
|
||
|
"""
|
||
|
Return an array with the elements of `self` converted to
|
||
|
uppercase.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.upper
|
||
|
|
||
|
"""
|
||
|
return asarray(upper(self))
|
||
|
|
||
|
def zfill(self, width):
|
||
|
"""
|
||
|
Return the numeric string left-filled with zeros in a string of
|
||
|
length `width`.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.zfill
|
||
|
|
||
|
"""
|
||
|
return asarray(zfill(self, width))
|
||
|
|
||
|
def isnumeric(self):
|
||
|
"""
|
||
|
For each element in `self`, return True if there are only
|
||
|
numeric characters in the element.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.isnumeric
|
||
|
|
||
|
"""
|
||
|
return isnumeric(self)
|
||
|
|
||
|
def isdecimal(self):
|
||
|
"""
|
||
|
For each element in `self`, return True if there are only
|
||
|
decimal characters in the element.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
char.isdecimal
|
||
|
|
||
|
"""
|
||
|
return isdecimal(self)
|
||
|
|
||
|
|
||
|
@set_module("numpy.char")
|
||
|
def array(obj, itemsize=None, copy=True, unicode=None, order=None):
|
||
|
"""
|
||
|
Create a `chararray`.
|
||
|
|
||
|
.. note::
|
||
|
This class is provided for numarray backward-compatibility.
|
||
|
New code (not concerned with numarray compatibility) should use
|
||
|
arrays of type `bytes_` or `str_` and use the free functions
|
||
|
in :mod:`numpy.char <numpy.core.defchararray>` for fast
|
||
|
vectorized string operations instead.
|
||
|
|
||
|
Versus a regular NumPy array of type `str` or `unicode`, this
|
||
|
class adds the following functionality:
|
||
|
|
||
|
1) values automatically have whitespace removed from the end
|
||
|
when indexed
|
||
|
|
||
|
2) comparison operators automatically remove whitespace from the
|
||
|
end when comparing values
|
||
|
|
||
|
3) vectorized string operations are provided as methods
|
||
|
(e.g. `str.endswith`) and infix operators (e.g. ``+, *, %``)
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
obj : array of str or unicode-like
|
||
|
|
||
|
itemsize : int, optional
|
||
|
`itemsize` is the number of characters per scalar in the
|
||
|
resulting array. If `itemsize` is None, and `obj` is an
|
||
|
object array or a Python list, the `itemsize` will be
|
||
|
automatically determined. If `itemsize` is provided and `obj`
|
||
|
is of type str or unicode, then the `obj` string will be
|
||
|
chunked into `itemsize` pieces.
|
||
|
|
||
|
copy : bool, optional
|
||
|
If true (default), then the object is copied. Otherwise, a copy
|
||
|
will only be made if __array__ returns a copy, if obj is a
|
||
|
nested sequence, or if a copy is needed to satisfy any of the other
|
||
|
requirements (`itemsize`, unicode, `order`, etc.).
|
||
|
|
||
|
unicode : bool, optional
|
||
|
When true, the resulting `chararray` can contain Unicode
|
||
|
characters, when false only 8-bit characters. If unicode is
|
||
|
None and `obj` is one of the following:
|
||
|
|
||
|
- a `chararray`,
|
||
|
- an ndarray of type `str` or `unicode`
|
||
|
- a Python str or unicode object,
|
||
|
|
||
|
then the unicode setting of the output array will be
|
||
|
automatically determined.
|
||
|
|
||
|
order : {'C', 'F', 'A'}, optional
|
||
|
Specify the order of the array. If order is 'C' (default), then the
|
||
|
array will be in C-contiguous order (last-index varies the
|
||
|
fastest). If order is 'F', then the returned array
|
||
|
will be in Fortran-contiguous order (first-index varies the
|
||
|
fastest). If order is 'A', then the returned array may
|
||
|
be in any order (either C-, Fortran-contiguous, or even
|
||
|
discontiguous).
|
||
|
"""
|
||
|
if isinstance(obj, (bytes, str)):
|
||
|
if unicode is None:
|
||
|
if isinstance(obj, str):
|
||
|
unicode = True
|
||
|
else:
|
||
|
unicode = False
|
||
|
|
||
|
if itemsize is None:
|
||
|
itemsize = len(obj)
|
||
|
shape = len(obj) // itemsize
|
||
|
|
||
|
return chararray(shape, itemsize=itemsize, unicode=unicode,
|
||
|
buffer=obj, order=order)
|
||
|
|
||
|
if isinstance(obj, (list, tuple)):
|
||
|
obj = numpy.asarray(obj)
|
||
|
|
||
|
if isinstance(obj, ndarray) and issubclass(obj.dtype.type, character):
|
||
|
# If we just have a vanilla chararray, create a chararray
|
||
|
# view around it.
|
||
|
if not isinstance(obj, chararray):
|
||
|
obj = obj.view(chararray)
|
||
|
|
||
|
if itemsize is None:
|
||
|
itemsize = obj.itemsize
|
||
|
# itemsize is in 8-bit chars, so for Unicode, we need
|
||
|
# to divide by the size of a single Unicode character,
|
||
|
# which for NumPy is always 4
|
||
|
if issubclass(obj.dtype.type, str_):
|
||
|
itemsize //= 4
|
||
|
|
||
|
if unicode is None:
|
||
|
if issubclass(obj.dtype.type, str_):
|
||
|
unicode = True
|
||
|
else:
|
||
|
unicode = False
|
||
|
|
||
|
if unicode:
|
||
|
dtype = str_
|
||
|
else:
|
||
|
dtype = bytes_
|
||
|
|
||
|
if order is not None:
|
||
|
obj = numpy.asarray(obj, order=order)
|
||
|
if (copy or
|
||
|
(itemsize != obj.itemsize) or
|
||
|
(not unicode and isinstance(obj, str_)) or
|
||
|
(unicode and isinstance(obj, bytes_))):
|
||
|
obj = obj.astype((dtype, int(itemsize)))
|
||
|
return obj
|
||
|
|
||
|
if isinstance(obj, ndarray) and issubclass(obj.dtype.type, object):
|
||
|
if itemsize is None:
|
||
|
# Since no itemsize was specified, convert the input array to
|
||
|
# a list so the ndarray constructor will automatically
|
||
|
# determine the itemsize for us.
|
||
|
obj = obj.tolist()
|
||
|
# Fall through to the default case
|
||
|
|
||
|
if unicode:
|
||
|
dtype = str_
|
||
|
else:
|
||
|
dtype = bytes_
|
||
|
|
||
|
if itemsize is None:
|
||
|
val = narray(obj, dtype=dtype, order=order, subok=True)
|
||
|
else:
|
||
|
val = narray(obj, dtype=(dtype, itemsize), order=order, subok=True)
|
||
|
return val.view(chararray)
|
||
|
|
||
|
|
||
|
@set_module("numpy.char")
|
||
|
def asarray(obj, itemsize=None, unicode=None, order=None):
|
||
|
"""
|
||
|
Convert the input to a `chararray`, copying the data only if
|
||
|
necessary.
|
||
|
|
||
|
Versus a regular NumPy array of type `str` or `unicode`, this
|
||
|
class adds the following functionality:
|
||
|
|
||
|
1) values automatically have whitespace removed from the end
|
||
|
when indexed
|
||
|
|
||
|
2) comparison operators automatically remove whitespace from the
|
||
|
end when comparing values
|
||
|
|
||
|
3) vectorized string operations are provided as methods
|
||
|
(e.g. `str.endswith`) and infix operators (e.g. ``+``, ``*``,``%``)
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
obj : array of str or unicode-like
|
||
|
|
||
|
itemsize : int, optional
|
||
|
`itemsize` is the number of characters per scalar in the
|
||
|
resulting array. If `itemsize` is None, and `obj` is an
|
||
|
object array or a Python list, the `itemsize` will be
|
||
|
automatically determined. If `itemsize` is provided and `obj`
|
||
|
is of type str or unicode, then the `obj` string will be
|
||
|
chunked into `itemsize` pieces.
|
||
|
|
||
|
unicode : bool, optional
|
||
|
When true, the resulting `chararray` can contain Unicode
|
||
|
characters, when false only 8-bit characters. If unicode is
|
||
|
None and `obj` is one of the following:
|
||
|
|
||
|
- a `chararray`,
|
||
|
- an ndarray of type `str` or 'unicode`
|
||
|
- a Python str or unicode object,
|
||
|
|
||
|
then the unicode setting of the output array will be
|
||
|
automatically determined.
|
||
|
|
||
|
order : {'C', 'F'}, optional
|
||
|
Specify the order of the array. If order is 'C' (default), then the
|
||
|
array will be in C-contiguous order (last-index varies the
|
||
|
fastest). If order is 'F', then the returned array
|
||
|
will be in Fortran-contiguous order (first-index varies the
|
||
|
fastest).
|
||
|
"""
|
||
|
return array(obj, itemsize, copy=False,
|
||
|
unicode=unicode, order=order)
|