
    Owg                    R   d Z ddlmZ ddlZddlZddlmZ ddlmZm	Z	m
Z
 ddlZddlZddlmZmZmZmZ ddlmZmZmZmZmZmZ ddlmZ dd	lmZ dd
lm Z m!Z! ddl"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1 ddl2m3Z3 ddl4m5Z5m6Z6m7Z7m8Z8 ddl9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z? ddl@mAZAmBZB ddlCmDZD ddlEmFZGmHZHmIZI ddlJmKZK erddlmLZLmMZMmNZN ddlOmPZPmQZQmRZR ddlSmTZTmUZU d6dZV	 	 	 	 	 	 	 	 d7dZWd8dZXej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  dZgd9dZhd:dZid Zjd;dZkd<d=dZlejZmdZnd>d Zo	 	 	 	 d?	 	 	 	 	 	 	 	 	 	 	 d@d!Zp e ed"       ed#       ed$      %      	 	 	 dA	 	 	 	 	 	 	 dBd&       Zq	 	 	 	 	 dC	 	 	 	 	 	 	 	 	 dDd'Zr	 	 	 	 	 dC	 	 	 	 	 	 	 	 	 dDd(Zs	 d<	 	 	 	 	 	 	 dEd)Zt	 	 dF	 	 	 	 	 	 	 dGd*Zu	 dH	 	 	 	 	 	 	 dId+Zv	 	 	 	 	 dJ	 	 	 	 	 	 	 	 	 	 	 	 	 dKd,Zw	 	 	 dL	 	 	 	 	 dMd-Zx	 	 dN	 	 	 	 	 	 	 	 	 dOd.Zyh d/ZzdPdQd0Z{	 	 	 	 dR	 	 	 	 	 	 	 	 	 	 	 dSd1Z|dTd2Z}dUd3Z~	 	 	 	 	 	 dVd4Z	 	 dW	 	 	 	 	 	 	 dXd5Zy)Yzl
Generic data algorithms. This module is experimental at the moment and not
intended for public consumption
    )annotationsN)dedent)TYPE_CHECKINGLiteralcast)algos	hashtableiNaTlib)AnyArrayLike	ArrayLikeAxisIntDtypeObjTakeIndexernpt)doc)find_stack_level)'construct_1d_object_array_from_listlikenp_find_common_type)ensure_float64ensure_objectensure_platform_intis_array_likeis_bool_dtypeis_complex_dtypeis_dict_likeis_extension_array_dtypeis_float_dtype
is_integeris_integer_dtypeis_list_likeis_object_dtypeis_signed_integer_dtypeneeds_i8_conversion)concat_compat)BaseMaskedDtypeCategoricalDtypeExtensionDtypeNumpyEADtype)ABCDatetimeArrayABCExtensionArrayABCIndexABCMultiIndex	ABCSeriesABCTimedeltaArray)isnana_value_for_dtype)take_nd)arrayensure_wrapped_if_datetimelikeextract_array)validate_indices)ListLikeNumpySorterNumpyValueArrayLike)CategoricalIndexSeries)BaseMaskedArrayExtensionArrayc                   t        | t              st        | d      } t        | j                        rt        t        j                  |             S t        | j                  t              rBt        d|       } | j                  st        | j                        S t        j                  |       S t        | j                  t              rt        d|       } | j                  S t        | j                        rdt        | t        j                         r$t        j                  |       j#                  d      S t        j                  |       j%                  dd      S t'        | j                        rt        j                  |       S t)        | j                        r8| j                  j*                  dv rt-        |       S t        j                  |       S t/        | j                        rt        t        j                   |       S t1        | j                        r-| j#                  d	      }t        t        j                   |      }|S t        j                  | t2        
      } t        |       S )a  
    routine to ensure that our data is of the correct
    input dtype for lower-level routines

    This will coerce:
    - ints -> int64
    - uint -> uint64
    - bool -> uint8
    - datetimelike -> i8
    - datetime64tz -> i8 (in local tz)
    - categorical -> codes

    Parameters
    ----------
    values : np.ndarray or ExtensionArray

    Returns
    -------
    np.ndarray
    Textract_numpyr=   r:   uint8Fcopy)         i8dtype)
isinstancer-   r5   r"   rJ   r   npasarrayr&   r   _hasna_ensure_data_datar'   codesr   ndarrayviewastyper    r   itemsizer   r   r$   object)valuesnpvaluess     M/var/www/horilla/myenv/lib/python3.12/site-packages/pandas/core/algorithms.pyrO   rO   j   s   , fm,vT:v||$RZZ/00	FLL/	2'0}}  --zz&!!	FLL"2	3 mV,||	v||	$fbjj)::f%**733 ::f%,,W5,AA	&,,	'zz&!!		% <<  K/!&))zz&!!	&,,	'BJJ'' 
V\\	*;;t$

H- ZZf-F      c                    t        | t              r| j                  |k(  r| S t        |t        j                        s%|j	                         }|j                  | |      } | S | j                  |d      } | S )z
    reverse of _ensure_data

    Parameters
    ----------
    values : np.ndarray or ExtensionArray
    dtype : np.dtype or ExtensionDtype
    original : AnyArrayLike

    Returns
    -------
    ExtensionArray or np.ndarray
    rI   FrC   )rK   r+   rJ   rL   construct_array_type_from_sequencerT   )rW   rJ   originalclss       rY   _reconstruct_datar`      sr      &+,1FeRXX& ((*##F%#8
 M u51MrZ   c                b   t        | t        t        t        t        j
                  f      s|dk7  r't        j                  | dt        t                      t        j                  | d      }|dv r(t        | t              rt        |       } t        |       } | S t	        j                  |       } | S )z5
    ensure that we are arraylike if not already
    isin-targetsz with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.
stacklevelFskipna)mixedstringmixed-integer)rK   r,   r.   r+   rL   rR   warningswarnFutureWarningr   r   infer_dtypetuplelistr   rM   )rW   	func_nameinferreds      rY   _ensure_arraylikerr      s     fx4ErzzRS&MM+ " " +- ??6%8;;&%(f<VDF M ZZ'FMrZ   )
complex128	complex64float64float32uint64uint32uint16rB   int64int32int16int8rh   rV   c                H    t        |       } t        |       }t        |   }|| fS )z
    Parameters
    ----------
    values : np.ndarray

    Returns
    -------
    htable : HashTable subclass
    values : ndarray
    )rO   _check_object_for_strings_hashtables)rW   ndtyper	   s      rY   _get_hashtable_algor     s-     &!F&v.FF#IfrZ   c                n    | j                   j                  }|dk(  rt        j                  | d      rd}|S )z
    Check if we can use string hashtable instead of object hashtable.

    Parameters
    ----------
    values : ndarray

    Returns
    -------
    str
    rV   Fre   rh   )rJ   namer   is_string_array)rW   r   s     rY   r   r     s7     \\F ve4FMrZ   c                    t        |       S )a3
  
    Return unique values based on a hash table.

    Uniques are returned in order of appearance. This does NOT sort.

    Significantly faster than numpy.unique for long enough sequences.
    Includes NA values.

    Parameters
    ----------
    values : 1d array-like

    Returns
    -------
    numpy.ndarray or ExtensionArray

        The return can be:

        * Index : when the input is an Index
        * Categorical : when the input is a Categorical dtype
        * ndarray : when the input is a Series/ndarray

        Return numpy.ndarray or ExtensionArray.

    See Also
    --------
    Index.unique : Return unique values from an Index.
    Series.unique : Return unique values of Series object.

    Examples
    --------
    >>> pd.unique(pd.Series([2, 1, 3, 3]))
    array([2, 1, 3])

    >>> pd.unique(pd.Series([2] + [1] * 5))
    array([2, 1])

    >>> pd.unique(pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")]))
    array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')

    >>> pd.unique(
    ...     pd.Series(
    ...         [
    ...             pd.Timestamp("20160101", tz="US/Eastern"),
    ...             pd.Timestamp("20160101", tz="US/Eastern"),
    ...         ]
    ...     )
    ... )
    <DatetimeArray>
    ['2016-01-01 00:00:00-05:00']
    Length: 1, dtype: datetime64[ns, US/Eastern]

    >>> pd.unique(
    ...     pd.Index(
    ...         [
    ...             pd.Timestamp("20160101", tz="US/Eastern"),
    ...             pd.Timestamp("20160101", tz="US/Eastern"),
    ...         ]
    ...     )
    ... )
    DatetimeIndex(['2016-01-01 00:00:00-05:00'],
            dtype='datetime64[ns, US/Eastern]',
            freq=None)

    >>> pd.unique(np.array(list("baabc"), dtype="O"))
    array(['b', 'a', 'c'], dtype=object)

    An unordered Categorical will return categories in the
    order of appearance.

    >>> pd.unique(pd.Series(pd.Categorical(list("baabc"))))
    ['b', 'a', 'c']
    Categories (3, object): ['a', 'b', 'c']

    >>> pd.unique(pd.Series(pd.Categorical(list("baabc"), categories=list("abc"))))
    ['b', 'a', 'c']
    Categories (3, object): ['a', 'b', 'c']

    An ordered Categorical preserves the category ordering.

    >>> pd.unique(
    ...     pd.Series(
    ...         pd.Categorical(list("baabc"), categories=list("abc"), ordered=True)
    ...     )
    ... )
    ['b', 'a', 'c']
    Categories (3, object): ['a' < 'b' < 'c']

    An array of tuples

    >>> pd.unique(pd.Series([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")]).values)
    array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object)
    )unique_with_mask)rW   s    rY   uniquer   3  s    | F##rZ   c                    t        |       dk(  ryt        |       } t        j                  | j	                         j                  d            dk7  j                         }|S )aH  
    Return the number of unique values for integer array-likes.

    Significantly faster than pandas.unique for long enough sequences.
    No checks are done to ensure input is integral.

    Parameters
    ----------
    values : 1d array-like

    Returns
    -------
    int : The number of unique values in ``values``
    r   intp)lenrO   rL   bincountravelrT   sum)rW   results     rY   nunique_intsr     sO     6{a&!Fkk&,,.//78A=BBDFMrZ   c                   t        | d      } t        | j                  t              r| j	                         S | }t        |       \  }}  |t        |             }|*|j	                  |       }t        ||j                  |      }|S |j	                  | |      \  }}t        ||j                  |      }|J ||j                  d      fS )z?See algorithms.unique for docs. Takes a mask for masked arrays.r   rp   maskbool)	rr   rK   rJ   r(   r   r   r   r`   rT   )rW   r   r^   r	   tableuniquess         rY   r   r     s    v:F&,,/}}H+F3Ivc&k"E|,,v&#GX^^XF V$7#GX^^XFF+++rZ   i@B c                   t        |       s"t        dt        |       j                   d      t        |      s"t        dt        |      j                   d      t	        |t
        t        t        t        j                  f      sUt        |      }t        |d      }t        |      dkD  rc|j                  j                  dv rKt        |       s@t!        |      }n4t	        |t"              rt        j$                  |      }nt'        |dd      }t        | d	      }t'        |d
      }t	        |t        j                        s|j)                  |      S t+        |j                        rt-        |      j)                  |      S t+        |j                        r:t/        |j                        s%t        j0                  |j2                  t4              S t+        |j                        rt)        ||j7                  t8                    S t	        |j                  t:              r2t)        t        j<                  |      t        j<                  |            S t        |      t>        kD  rBt        |      dk  r4|j                  t8        k7  r!tA        |      jC                         rd }nZd }nVtE        |j                  |j                        }|j7                  |d      }|j7                  |d      }tF        jH                  } |||      S )z
    Compute the isin boolean array.

    Parameters
    ----------
    comps : list-like
    values : list-like

    Returns
    -------
    ndarray[bool]
        Same length as `comps`.
    zIonly list-like objects are allowed to be passed to isin(), you passed a ``rb   r   r   iufcbT)rA   extract_rangeisinr@   rI      c                    t        j                  t        j                  | |      j                         t        j                  |             S N)rL   
logical_orr   r   isnan)cvs     rY   fzisin.<locals>.f  s.    }}RWWQ]%8%8%:BHHQKHHrZ   c                J    t        j                  | |      j                         S r   )rL   r   r   )abs     rY   <lambda>zisin.<locals>.<lambda>  s    RWWQ]002 rZ   FrC   )%r!   	TypeErrortype__name__rK   r,   r.   r+   rL   rR   ro   rr   r   rJ   kindr#   r   r-   r3   r5   r   r$   pd_arrayr"   zerosshaper   rT   rV   r(   rM   _MINIMUM_COMP_ARR_LENr0   anyr   htableismember)compsrW   orig_valuescomps_arrayr   commons         rY   r   r     sj    ((,U(<(<'=Q@
 	
 ((,V(=(='>aA
 	

 fx4ErzzRS6l";.I K!O!!W,+E2 =[IF	FM	*&!vTN#EV<K4@Kk2::.''	[..	/$))&11	V\\	*?;CTCT3Uxx))66	V\\	*Kv!677	FLL.	1BJJ{+RZZ-?@@ 	K00K2' <I 3A %V\\;3D3DEvE2!((e(<OO[&!!rZ   c                   | }| j                   j                  dv rt        }t        |       \  }}  ||xs t	        |             }|j                  | d|||      \  }}	t        ||j                   |      }t        |	      }	|	|fS )a(  
    Factorize a numpy array to codes and uniques.

    This doesn't do any coercion of types or unboxing before factorization.

    Parameters
    ----------
    values : ndarray
    use_na_sentinel : bool, default True
        If True, the sentinel -1 will be used for NaN values. If False,
        NaN values will be encoded as non-negative integers and will not drop the
        NaN from the uniques of the values.
    size_hint : int, optional
        Passed through to the hashtable's 'get_labels' method
    na_value : object, optional
        A value in `values` to consider missing. Note: only use this
        parameter when you know that you don't have any values pandas would
        consider missing in the array (NaN for float data, iNaT for
        datetimes, etc.).
    mask : ndarray[bool], optional
        If not None, the mask is used as indicator for missing values
        (True = missing, False = valid) instead of `na_value` or
        condition "val != val".

    Returns
    -------
    codes : ndarray[np.intp]
    uniques : ndarray
    mM)na_sentinelna_valuer   	ignore_na)rJ   r   r
   r   r   	factorizer`   r   )
rW   use_na_sentinel	size_hintr   r   r^   
hash_klassr   r   rQ   s
             rY   factorize_arrayr   $  s    H H||D 
 ,V4Jy/CK0E__! % NGU  BG&E'>rZ   z    values : sequence
        A 1-D sequence. Sequences that aren't pandas objects are
        coerced to ndarrays before factorization.
    zt    sort : bool, default False
        Sort `uniques` and shuffle `codes` to maintain the
        relationship.
    zG    size_hint : int, optional
        Hint to the hashtable sizer.
    )rW   sortr   c                   t        | t        t        f      r| j                  ||      S t	        | d      } | }t        | t
        t        f      r%| j                  | j                  |      \  }}||fS t        | t        j                        s| j                  |      \  }}nt        j                  |       } |s\| j                  t        k(  rIt        |       }|j                         r.t        | j                  d      }t        j                   |||       } t#        | ||      \  }}|r!t%        |      d	kD  rt'        |||d
d      \  }}t)        ||j                  |      }||fS )aN  
    Encode the object as an enumerated type or categorical variable.

    This method is useful for obtaining a numeric representation of an
    array when all that matters is identifying distinct values. `factorize`
    is available as both a top-level function :func:`pandas.factorize`,
    and as a method :meth:`Series.factorize` and :meth:`Index.factorize`.

    Parameters
    ----------
    {values}{sort}
    use_na_sentinel : bool, default True
        If True, the sentinel -1 will be used for NaN values. If False,
        NaN values will be encoded as non-negative integers and will not drop the
        NaN from the uniques of the values.

        .. versionadded:: 1.5.0
    {size_hint}
    Returns
    -------
    codes : ndarray
        An integer ndarray that's an indexer into `uniques`.
        ``uniques.take(codes)`` will have the same values as `values`.
    uniques : ndarray, Index, or Categorical
        The unique valid values. When `values` is Categorical, `uniques`
        is a Categorical. When `values` is some other pandas object, an
        `Index` is returned. Otherwise, a 1-D ndarray is returned.

        .. note::

           Even if there's a missing value in `values`, `uniques` will
           *not* contain an entry for it.

    See Also
    --------
    cut : Discretize continuous-valued array.
    unique : Find the unique value in an array.

    Notes
    -----
    Reference :ref:`the user guide <reshaping.factorize>` for more examples.

    Examples
    --------
    These examples all show factorize as a top-level method like
    ``pd.factorize(values)``. The results are identical for methods like
    :meth:`Series.factorize`.

    >>> codes, uniques = pd.factorize(np.array(['b', 'b', 'a', 'c', 'b'], dtype="O"))
    >>> codes
    array([0, 0, 1, 2, 0])
    >>> uniques
    array(['b', 'a', 'c'], dtype=object)

    With ``sort=True``, the `uniques` will be sorted, and `codes` will be
    shuffled so that the relationship is the maintained.

    >>> codes, uniques = pd.factorize(np.array(['b', 'b', 'a', 'c', 'b'], dtype="O"),
    ...                               sort=True)
    >>> codes
    array([1, 1, 0, 2, 1])
    >>> uniques
    array(['a', 'b', 'c'], dtype=object)

    When ``use_na_sentinel=True`` (the default), missing values are indicated in
    the `codes` with the sentinel value ``-1`` and missing values are not
    included in `uniques`.

    >>> codes, uniques = pd.factorize(np.array(['b', None, 'a', 'c', 'b'], dtype="O"))
    >>> codes
    array([ 0, -1,  1,  2,  0])
    >>> uniques
    array(['b', 'a', 'c'], dtype=object)

    Thus far, we've only factorized lists (which are internally coerced to
    NumPy arrays). When factorizing pandas objects, the type of `uniques`
    will differ. For Categoricals, a `Categorical` is returned.

    >>> cat = pd.Categorical(['a', 'a', 'c'], categories=['a', 'b', 'c'])
    >>> codes, uniques = pd.factorize(cat)
    >>> codes
    array([0, 0, 1])
    >>> uniques
    ['a', 'c']
    Categories (3, object): ['a', 'b', 'c']

    Notice that ``'b'`` is in ``uniques.categories``, despite not being
    present in ``cat.values``.

    For all other pandas objects, an Index of the appropriate type is
    returned.

    >>> cat = pd.Series(['a', 'a', 'c'])
    >>> codes, uniques = pd.factorize(cat)
    >>> codes
    array([0, 0, 1])
    >>> uniques
    Index(['a', 'c'], dtype='object')

    If NaN is in the values, and we want to include NaN in the uniques of the
    values, it can be achieved by setting ``use_na_sentinel=False``.

    >>> values = np.array([1, 2, 1, np.nan])
    >>> codes, uniques = pd.factorize(values)  # default: use_na_sentinel=True
    >>> codes
    array([ 0,  1,  0, -1])
    >>> uniques
    array([1., 2.])

    >>> codes, uniques = pd.factorize(values, use_na_sentinel=False)
    >>> codes
    array([0, 1, 0, 2])
    >>> uniques
    array([ 1.,  2., nan])
    )r   r   r   r   )r   )r   F)compat)r   r   r   T)r   assume_uniqueverify)rK   r,   r.   r   rr   r*   r/   freqrL   rR   rM   rJ   rV   r0   r   r1   wherer   r   	safe_sortr`   )	rW   r   r   r   r^   rQ   r   	null_maskr   s	            rY   r   r   b  sV   p &8Y/0T?KKv=FH 	6,.?@AKK#  ))t)4wg~

+))/)Jw F#6<<6#9
 VI}}-fll5I)Xv>(+
w Gq "+
  BG'>rZ   c                l    t        j                  dt        t                      t	        | |||||      S )aK  
    Compute a histogram of the counts of non-null values.

    Parameters
    ----------
    values : ndarray (1-d)
    sort : bool, default True
        Sort by values
    ascending : bool, default False
        Sort in ascending order
    normalize: bool, default False
        If True then compute a relative histogram
    bins : integer, optional
        Rather than count values, group them into half-open bins,
        convenience for pd.cut, only works with numeric data
    dropna : bool, default True
        Don't include counts of NaN

    Returns
    -------
    Series
    zupandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.rc   )r   	ascending	normalizebinsdropna)rj   rk   rl   r   value_counts_internal)rW   r   r   r   r   r   s         rY   value_countsr   /  s@    < MM	E#% ! rZ   c                   ddl m}m} t        | dd       }|rdnd}	|ddlm}
 t        | |      r| j                  } 	  |
| |d      }|j                  |
      }|	|_
        ||j                  j                            }|j                  j                  d      |_        |j                         }|r,|j                  dk(  j                         r|j                   dd }t#        j$                  t'        |      g      }n t)        |       rz || d      j                  j                  |
      }|	|_
        ||j                  _
        |j                  }t        |t"        j*                        st#        j,                  |      }n{t        | t.              rot1        t3        | j4                              } || |	      j7                  ||      j9                         }| j:                  |j                  _        |j                  }nt=        | d      } t?        | |      \  }}}|j@                  t"        jB                  k(  r|j                  t"        jD                        } ||      }|j@                  tF        k(  r)|j@                  tH        k(  r|j                  tH              }nL|j@                  |j@                  k7  r3|j@                  dk7  r$tK        jL                  dtN        tQ                      ||_
         ||||	d      }|r|jS                  |      }|r||jU                         z  }|S # t        $ r}t        d	      |d }~ww xY w)Nr   )r;   r<   r   
proportioncount)cutT)include_lowestz+bins argument only works with numeric data.r   intervalFrC   )indexr   )levelr   r   r   zstring[pyarrow_numpy]zThe behavior of value_counts with object-dtype is deprecated. In a future version, this will *not* perform dtype inference on the resulting index. To retain the old behavior, use `result.index = result.index.infer_objects()`rc   )r   r   rD   )r   )+pandasr;   r<   getattrpandas.core.reshape.tiler   rK   _valuesr   r   r   r   notnarT   
sort_indexallilocrL   r3   r   r   rR   rM   r-   ro   rangenlevelsgroupbysizenamesrr   value_counts_arraylikerJ   float16rv   r   rV   rj   rk   rl   r   sort_valuesr   )rW   r   r   r   r   r   r;   r<   
index_namer   r   iierrr   countslevelskeys_idxs                      rY   r   r   ^  s   
 .J$<'D0ff%^^F	TVT$7B
 /**,-||**:6""$ v~~*//1[[1%F 3r7)$ $F+F/77DDFDSFFK *FLL^^Ffbjj1F+.%/0FV$/vf5 
 "(FLL^^F 'vHF4VVDOD&!zzRZZ'{{2::. +CyyD TZZ6%9jj(		TZZ'II!88D "/1 "CHF#DuEF##i#8&**,&MS  	TIJPSS	Ts   L( (	M1L==Mc                    | }t        |       } t        j                  | ||      \  }}}t        |j                        r|r|t
        k7  }||   ||   }}t        ||j                  |      }|||fS )z
    Parameters
    ----------
    values : np.ndarray
    dropna : bool
    mask : np.ndarray[bool] or None, default None

    Returns
    -------
    uniques : np.ndarray
    counts : np.ndarray[np.int64]
    r   )rO   r   value_countr$   rJ   r
   r`   )rW   r   r   r^   r   r   
na_counterres_keyss           rY   r   r     sx     H&!F%11&&tLD&*8>>* 4<D:vd|&D x~~x@HVZ''rZ   c                H    t        |       } t        j                  | ||      S )ax  
    Return boolean ndarray denoting duplicate values.

    Parameters
    ----------
    values : np.ndarray or ExtensionArray
        Array over which to check for duplicate values.
    keep : {'first', 'last', False}, default 'first'
        - ``first`` : Mark duplicates as ``True`` except for the first
          occurrence.
        - ``last`` : Mark duplicates as ``True`` except for the last
          occurrence.
        - False : Mark all duplicates as ``True``.
    mask : ndarray[bool], optional
        array indicating which elements to exclude from checking

    Returns
    -------
    duplicated : ndarray[bool]
    )keepr   )rO   r   
duplicated)rW   r   r   s      rY   r   r     s#    2 &!FV$T::rZ   c                   t        | d      } | }t        | j                        r)t        |       } t	        d|       } | j                  |      S t        |       } t        j                  | ||      \  }}|||fS 	 t        j                  |      }t        ||j                  |      }|S # t        $ r,}t        j                  d| t                      Y d}~Id}~ww xY w)	a  
    Returns the mode(s) of an array.

    Parameters
    ----------
    values : array-like
        Array over which to check for duplicate values.
    dropna : bool, default True
        Don't consider counts of NaN/NaT.

    Returns
    -------
    np.ndarray or ExtensionArray
    moder   r>   r   )r   r   NzUnable to sort modes: rc   )rr   r$   rJ   r4   r   _moderO   r   r   rL   r   r   rj   rk   r   r`   )rW   r   r   r^   npresultres_maskr   r   s           rY   r   r     s    " v8FH6<<(/7&/||6|**&!FVFFHh!!
778$ xBFM  
$SE*')	
 	

s   ;B) )	C2"CCc           	     
   t        | j                        }t        |       } | j                  dk(  rt	        j
                  | |||||      }|S | j                  dk(  rt	        j                  | ||||||      }|S t        d      )a  
    Rank the values along a given axis.

    Parameters
    ----------
    values : np.ndarray or ExtensionArray
        Array whose values will be ranked. The number of dimensions in this
        array must not exceed 2.
    axis : int, default 0
        Axis over which to perform rankings.
    method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
        The method by which tiebreaks are broken during the ranking.
    na_option : {'keep', 'top'}, default 'keep'
        The method by which NaNs are placed in the ranking.
        - ``keep``: rank each NaN value with a NaN ranking
        - ``top``: replace each NaN with either +/- inf so that they
                   there are ranked at the top
    ascending : bool, default True
        Whether or not the elements should be ranked in ascending order.
    pct : bool, default False
        Whether or not to the display the returned rankings in integer form
        (e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1).
       )is_datetimeliketies_methodr   	na_optionpctrE   )axisr  r  r   r  r  z&Array with ndim > 2 are not supported.)r$   rJ   rO   ndimr   rank_1drank_2dr   )rW   r  methodr  r   r  r  rankss           rY   rankr  +  s    > *&,,7O&!F{{a+
* L 
	+
 L @AArZ   c                x   t        | t        j                  t        t        t
        f      s$t        j                  dt        t                      t        |       st        j                  |       } t        |      }|r+t        || j                  |          t        | ||d|      }|S | j!                  ||      }|S )ak	  
    Take elements from an array.

    Parameters
    ----------
    arr : array-like or scalar value
        Non array-likes (sequences/scalars without a dtype) are coerced
        to an ndarray.

        .. deprecated:: 2.1.0
            Passing an argument other than a numpy.ndarray, ExtensionArray,
            Index, or Series is deprecated.

    indices : sequence of int or one-dimensional np.ndarray of int
        Indices to be taken.
    axis : int, default 0
        The axis over which to select values.
    allow_fill : bool, default False
        How to handle negative values in `indices`.

        * False: negative values in `indices` indicate positional indices
          from the right (the default). This is similar to :func:`numpy.take`.

        * True: negative values in `indices` indicate
          missing values. These values are set to `fill_value`. Any other
          negative values raise a ``ValueError``.

    fill_value : any, optional
        Fill value to use for NA-indices when `allow_fill` is True.
        This may be ``None``, in which case the default NA value for
        the type (``self.dtype.na_value``) is used.

        For multi-dimensional `arr`, each *element* is filled with
        `fill_value`.

    Returns
    -------
    ndarray or ExtensionArray
        Same type as the input.

    Raises
    ------
    IndexError
        When `indices` is out of bounds for the array.
    ValueError
        When the indexer contains negative values other than ``-1``
        and `allow_fill` is True.

    Notes
    -----
    When `allow_fill` is False, `indices` may be whatever dimensionality
    is accepted by NumPy for `arr`.

    When `allow_fill` is True, `indices` should be 1-D.

    See Also
    --------
    numpy.take : Take elements from an array along an axis.

    Examples
    --------
    >>> import pandas as pd

    With the default ``allow_fill=False``, negative numbers indicate
    positional indices from the right.

    >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1])
    array([10, 10, 30])

    Setting ``allow_fill=True`` will place `fill_value` in those positions.

    >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True)
    array([10., 10., nan])

    >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True,
    ...      fill_value=-10)
    array([ 10,  10, -10])
    zpd.api.extensions.take accepting non-standard inputs is deprecated and will raise in a future version. Pass either a numpy.ndarray, ExtensionArray, Index, or Series instead.rc   T)r  
allow_fill
fill_value)r  )rK   rL   rR   r+   r,   r.   rj   rk   rl   r   r   rM   r   r6   r   r2   take)arrindicesr  r  r  r   s         rY   r  r  k  s    j cBJJ(98YOP8 ')	
 jjo!'*G#))D/2t
 M '-MrZ   c                   |t        |      }t        | t        j                        r(| j                  j
                  dv rt        |      st        |      rt        j                  | j                  j                        }t        |      rt        j                  |g      nt        j                  |      }||j                  k\  j                         r*||j                  k  j                         r| j                  }n|j                  }t        |      r t        t        |j                  |            }n't!        t        t"        |      |      }nt%        |       } | j'                  |||      S )a  
    Find indices where elements should be inserted to maintain order.

    Find the indices into a sorted array `arr` (a) such that, if the
    corresponding elements in `value` were inserted before the indices,
    the order of `arr` would be preserved.

    Assuming that `arr` is sorted:

    ======  ================================
    `side`  returned index `i` satisfies
    ======  ================================
    left    ``arr[i-1] < value <= self[i]``
    right   ``arr[i-1] <= value < self[i]``
    ======  ================================

    Parameters
    ----------
    arr: np.ndarray, ExtensionArray, Series
        Input array. If `sorter` is None, then it must be sorted in
        ascending order, otherwise `sorter` must be an array of indices
        that sort it.
    value : array-like or scalar
        Values to insert into `arr`.
    side : {'left', 'right'}, optional
        If 'left', the index of the first suitable location found is given.
        If 'right', return the last such index.  If there is no suitable
        index, return either 0 or N (where N is the length of `self`).
    sorter : 1-D array-like, optional
        Optional array of integer indices that sort array a into ascending
        order. They are typically the result of argsort.

    Returns
    -------
    array of ints or int
        If value is array-like, array of insertion points.
        If value is scalar, a single integer.

    See Also
    --------
    numpy.searchsorted : Similar method from NumPy.
    iurI   )sidesorter)r   rK   rL   rR   rJ   r   r   r    iinfor   r3   minr   maxr   intr   r   r4   searchsorted)r  valuer  r  r  	value_arrrJ   s          rY   r  r    s   ` $V, 	3

#IINNd""25"9 ()3E):BHHeW%	"'')yEII/E.J.J.L IIEOOEeejj/0ET)U35AE -S1 EV<<rZ   >   r}   r|   r{   rz   rv   ru   c                \   t        |      }t        j                  }| j                  }t	        |      }|rt
        j                  }nt
        j                  }t        |t              r| j                         } | j                  }t        | t        j                        s|t        | d|j                   d      rA|dk7  r$t        dt        |       j                   d|        || | j!                  |            S t#        t        |       j                   d      d}| j                  j$                  dv r*t        j&                  }| j)                  d      } t*        }d	}nZ|rt        j,                  }nG|j$                  d
v r9| j                  j.                  dv rt        j0                  }nt        j2                  }| j4                  }|dk(  r| j7                  dd      } t        j                  |      }t        j8                  | j:                  |      }	t=        d      gdz  }
|dk\  rt=        d|      nt=        |d      |
|<   ||	t?        |
      <   | j                  j.                  t@        v rtC        jD                  | |	|||       nt=        d      gdz  }|dk\  rt=        |d      nt=        d|      ||<   t?        |      }t=        d      gdz  }|dkD  rt=        d|       nt=        | d      ||<   t?        |      } || |   | |         |	|<   |r|	j)                  d      }	|dk(  r	|	dddf   }	|	S )aQ  
    difference of n between self,
    analogous to s-s.shift(n)

    Parameters
    ----------
    arr : ndarray or ExtensionArray
    n : int
        number of periods
    axis : {0, 1}
        axis to shift on
    stacklevel : int, default 3
        The stacklevel for the lost dtype warning.

    Returns
    -------
    shifted
    __r   zcannot diff z	 on axis=zK has no 'diff' method. Convert to a suitable dtype prior to calling 'diff'.Fr   rH   Tr  )r}   r|   r  r   rI   NrE   )datetimelikeztimedelta64[ns])#r  rL   nanrJ   r   operatorxorsubrK   r)   to_numpyrR   hasattrr   
ValueErrorr   shiftr   r   rz   rS   r
   object_r   rv   ru   r  reshapeemptyr   slicern   _diff_specialr   diff_2d)r  nr  narJ   is_boolopis_timedelta	orig_ndimout_arr
na_indexer_res_indexerres_indexer_lag_indexerlag_indexers                  rY   diffr;  ;  s   ( 	AA	BIIEE"G\\\\%&lln		c2::&3"R[[M,-qy <S	0B0B/C9TF!STTc399Q<((9%%& 'G G 
 L
yy~~hhtn	

	t	
 99>>..JJEJJEIA~kk"a  HHUOEhhsyy.G+"J)*auT1~U1d^Jt!#GE*
yy~~& 	c7At,G d}q(/0AvU1d^5q>TL)d}q(01AU4!_5!T?TL)!#k"2C4DE,,01A~!Q$-NrZ   c                8   t        | t        j                  t        t        f      st        d      d}t        | j                  t              s&t        j                  | d      dk(  rt        |       }n"	 | j                         }| j                  |      }||S t%        |      st        d      t'        t        j(                  |            }|s+t+        t-        |             t+        |       k(  st/        d      |Jt1        |       \  }}  |t+        |             }|j3                  |        t'        |j5                  |            }|rG|j                         }	|r$|t+        |        k  |t+        |       k\  z  }
d||
<   nd}
t7        |	|d	
      }nt        j8                  t+        |      t:              }|j=                  |t        j>                  t+        |                   |j                  |d      }|r(|d	k(  }
|r!|
|t+        |        k  z  |t+        |       k\  z  }
|r
t        j@                  ||
d	       |t'        |      fS # t
        t        j                  f$ r: | j                  rt        | d   t               rt#        |       }nt        |       }Y w xY w)a  
    Sort ``values`` and reorder corresponding ``codes``.

    ``values`` should be unique if ``codes`` is not None.
    Safe for use with mixed types (int, str), orders ints before strs.

    Parameters
    ----------
    values : list-like
        Sequence; must be unique if ``codes`` is not None.
    codes : np.ndarray[intp] or None, default None
        Indices to ``values``. All out of bound indices are treated as
        "not found" and will be masked with ``-1``.
    use_na_sentinel : bool, default True
        If True, the sentinel -1 will be used for NaN values. If False,
        NaN values will be encoded as non-negative integers and will not drop the
        NaN from the uniques of the values.
    assume_unique : bool, default False
        When True, ``values`` are assumed to be unique, which can speed up
        the calculation. Ignored when ``codes`` is None.
    verify : bool, default True
        Check if codes are out of bound for the values and put out of bound
        codes equal to ``-1``. If ``verify=False``, it is assumed there
        are no out of bound codes. Ignored when ``codes`` is None.

    Returns
    -------
    ordered : AnyArrayLike
        Sorted ``values``
    new_codes : ndarray
        Reordered ``codes``; returned when ``codes`` is not None.

    Raises
    ------
    TypeError
        * If ``values`` is not list-like or if ``codes`` is neither None
        nor list-like
        * If ``values`` cannot be sorted
    ValueError
        * If ``codes`` is not None and ``values`` contain duplicates.
    zbOnly np.ndarray, ExtensionArray, and Index objects are allowed to be passed to safe_sort as valuesNFre   ri   r   zMOnly list-like objects or None are allowed to be passed to safe_sort as codesz,values should be unique if codes is not Noner   r  rI   wrap)r   )!rK   rL   rR   r+   r,   r   rJ   r(   r   rm   _sort_mixedargsortr  decimalInvalidOperationr   rn   _sort_tuplesr!   r   rM   r   r   r'  r   map_locationslookupr2   r+  r  putarangeputmask)rW   rQ   r   r   r   r  orderedr   torder2r   	new_codesreverse_indexers                rY   r   r     sZ   ` frzz+<hGH/
 	

 F v||^4OOF51_Df%	.^^%Fkk&)G }.
 	
  

5 12EVF^!4F!CGHH~
 18
Fs6{#	$QXXg%67!S[L(Uc&k-ABDE$KDFEb9	((3v;c:FBIIc&k$:; $((V(<	B;DuF|34V8LM4+


9dB''	222{ 7334 
	. {{z&)U; 'v.%f-
	.s   4!I AJJc           	     V   t        j                  | D cg c]  }t        |t               c}t              }t        j                  | D cg c]  }t        |       c}t              }| | z  }t        j                  | |         }t        j                  | |         }|j                         d   j                  |      }|j                         d   j                  |      }|j                         d   }	t        j                  |||	g      }
| j                  |
      S c c}w c c}w )z3order ints before strings before nulls in 1d arraysrI   r   )
rL   r3   rK   strr   r0   r@  nonzeror  concatenate)rW   xstr_posnull_posnum_posstr_argsortnum_argsortstr_locsnum_locs	null_locslocss              rY   r?  r?  .  s    hhF;q
1c*;4HGxx&1Qa1>Hh("G**VG_-K**VG_-K #((5H #((5H  "1%I>>8Xy9:D;;t <1s   D!D&c                P    ddl m} ddlm}  || d      \  }} ||d      }| |   S )a  
    Convert array of tuples (1d) to array of arrays (2d).
    We need to keep the columns separately as they contain different types and
    nans (can't use `np.sort` as it may fail when str and nan are mixed in a
    column as types cannot be compared).
    r   )	to_arrays)lexsort_indexerNT)orders)"pandas.core.internals.constructionr]  pandas.core.sortingr^  )rW   r]  r^  arraysr   indexers         rY   rC  rC  =  s0     =3&$'IFAfT2G'?rZ   c                   ddl m} t        j                         5  t        j                  ddt
               t        | d      }t        |d      }ddd       j                  d	      \  }}t        j                  |j                  |j                        } |||j                  d
d      }t        | t              r0t        |t              r | j                  |      j                         }n[t        | t               r| j"                  } t        |t               r|j"                  }t%        | |g      }t        |      }t'        |      }|j)                  |      j                  }t        j*                  ||      S # 1 sw Y   ,xY w)a  
    Extracts the union from lvals and rvals with respect to duplicates and nans in
    both arrays.

    Parameters
    ----------
    lvals: np.ndarray or ExtensionArray
        left values which is ordered in front.
    rvals: np.ndarray or ExtensionArray
        right values ordered after lvals.

    Returns
    -------
    np.ndarray or ExtensionArray
        Containing the unsorted union of both arrays.

    Notes
    -----
    Caller is responsible for ensuring lvals.dtype == rvals.dtype.
    r   r<   ignorez<The behavior of value_counts with object-dtype is deprecated)categoryFr   Nr=  r  )r   rJ   rD   )r   r<   rj   catch_warningsfilterwarningsrl   r   alignrL   maximumrW   r   rK   r-   appendr   r,   r   r%   r4   reindexrepeat)	lvalsrvalsr<   l_countr_countfinal_countunique_valscombinedrepeatss	            rY   union_with_duplicatesrw  L  s;   . 		 	 	" 	= 	J"	

 (e<'e<	= }}W};GW**W^^W^^<KGMMUSK%'Jum,Lll5)002eX&MMEeX&MME !%0X&4[A!!+.55G99['**7	= 	=s   7E<<Fc                  	 |dvrd| d}t        |      t        |      rYt        |t              rt	        |d      r|		fd}n5ddlm} t        |      dk(  r ||t        j                        }n ||      }t        |t              rU|d	k(  r||j                  j                            }|j                  j                  |       }t        |j                  |      }|S t        |       s| j!                         S | j#                  t$        d
      }|t'        j(                  |||      S t'        j*                  ||t-        |      j/                  t        j0                        |      S )a  
    Map values using an input mapping or function.

    Parameters
    ----------
    mapper : function, dict, or Series
        Mapping correspondence.
    na_action : {None, 'ignore'}, default None
        If 'ignore', propagate NA values, without passing them to the
        mapping correspondence.
    convert : bool, default True
        Try to find better dtype for elementwise function results. If
        False, leave as dtype=object.

    Returns
    -------
    Union[ndarray, Index, ExtensionArray]
        The output of the mapping function applied to the array.
        If the function returns a tuple with more than one element
        a MultiIndex will be returned.
    )Nrf  z+na_action must either be 'ignore' or None, z was passed__missing__c                |    t        | t              r't        j                  |       rt        j                     S |    S r   )rK   floatrL   r   r!  )rR  dict_with_defaults    rY   r   zmap_array.<locals>.<lambda>  s1    0$Q.288A;  DE  rZ   r   re  rI   rf  FrC   )convert)r   r}  )r'  r   rK   dictr&  r   r<   r   rL   ru   r.   r   r   get_indexerr2   r   rD   rT   rV   r   	map_infermap_infer_maskr0   rS   rB   )
r  mapper	na_actionr}  msgr<   rc  
new_valuesrW   r|  s
            @rY   	map_arrayr    s@   6 ((;I;kRo
 Ffd#(F !'F &6{abjj9&)$ FLL..01F ,,**3/V^^W5
s8xxz ZZUZ+F}}VVW==!!Ff!2!2288!<g
 	
rZ   )rW   r   return
np.ndarray)rW   r   rJ   r   r^   r   r  r   )rp   rO  r  r   )rW   r  )rW   r  r  rO  )rW   r   r  r  r   )r   npt.NDArray[np.bool_] | None)r   r7   rW   r7   r  npt.NDArray[np.bool_])TNNN)rW   r  r   r   r   
int | Noner   rV   r   r  r  z'tuple[npt.NDArray[np.intp], np.ndarray])FTN)r   r   r   r   r   r  r  z%tuple[np.ndarray, np.ndarray | Index])TFFNT)
r   r   r   r   r   r   r   r   r  r<   )rW   r  r   r   r   r  r  z,tuple[ArrayLike, npt.NDArray[np.int64], int])firstN)rW   r   r   zLiteral['first', 'last', False]r   r  r  r  )TN)rW   r   r   r   r   r  r  r   )r   averager   TF)rW   r   r  r   r
  rO  r  rO  r   r   r  r   r  znpt.NDArray[np.float64])r   FN)r  r   r  r   r  r   )leftN)
r  r   r  z$NumpyValueArrayLike | ExtensionArrayr  zLiteral['left', 'right']r  zNumpySorter | Noner  znpt.NDArray[np.intp] | np.intp)r   )r/  r  r  r   )NTFT)rW   zIndex | ArrayLikerQ   znpt.NDArray[np.intp] | Noner   r   r   r   r   r   r  z.AnyArrayLike | tuple[AnyArrayLike, np.ndarray])r  r   )rW   r  r  r  )ro  ArrayLike | Indexrp  r  r  r  )NT)r  r   r  zLiteral['ignore'] | Noner}  r   r  z#np.ndarray | ExtensionArray | Index)__doc__
__future__r   rA  r"  textwrapr   typingr   r   r   rj   numpyrL   pandas._libsr   r	   r   r
   r   pandas._typingr   r   r   r   r   r   pandas.util._decoratorsr   pandas.util._exceptionsr   pandas.core.dtypes.castr   r   pandas.core.dtypes.commonr   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   pandas.core.dtypes.concatr%   pandas.core.dtypes.dtypesr&   r'   r(   r)   pandas.core.dtypes.genericr*   r+   r,   r-   r.   r/   pandas.core.dtypes.missingr0   r1   pandas.core.array_algos.taker2   pandas.core.constructionr3   r   r4   r5   pandas.core.indexersr6   r7   r8   r9   r   r:   r;   r<   pandas.core.arraysr=   r>   rO   r`   rr   Complex128HashTableComplex64HashTableFloat64HashTableFloat32HashTableUInt64HashTableUInt32HashTableUInt16HashTableUInt8HashTableInt64HashTableInt32HashTableInt16HashTableInt8HashTableStringHashTablePyObjectHashTabler   r   r   r   r   r   unique1dr   r   r   r   r   r   r   r   r   r  r  r  r-  r;  r   r?  rC  rw  r   rZ   rY   <module>r     s   #    
     ( 4    " 4  
 1 
 2  
K!\&2>B8 ,,**&&&&$$$$$$""""""""  $$&&$$6^$B.,0  " X"z ! )-;;; ; 	;
 '; -;| 	 
	
 	0   	t
t t 	t
 +t-,tr 	,
, , 	, , ,b 	a
a a 	a a aL LP(( $(,H(1(B -4)-;;
); '; 	;< RV))#)2N))\ 88
8 8 	8
 8 
8 8F mm m 	mp &,!%	Q=	Q=/Q= #Q= 	Q=
 $Q=p Jgf *. 33&3 3 	3
 3 43D4+4+%64+4+t +/	P
	P
 (P
 	P

 )P
rZ   