
    Owg%                    P   d Z ddlmZ ddlZddlmZ ddlZddlm	Z	 ddl
mZ ddlmZ ddlmZmZmZmZmZ erdd	lmZmZmZ dd
lmZmZ ddlmZmZmZmZ dZ 	 	 	 	 	 	 ddZ!dde df	 	 	 	 	 	 	 	 	 	 	 ddZ"de f	 	 	 	 	 	 	 ddZ#de df	 	 	 	 	 	 	 	 	 ddZ$de df	 	 	 	 	 	 	 	 	 ddZ%y)z"
data hash pandas / numpy objects
    )annotationsN)TYPE_CHECKING)hash_object_array)is_list_like)CategoricalDtype)ABCDataFrameABCExtensionArrayABCIndexABCMultiIndex	ABCSeries)HashableIterableIterator)	ArrayLikenpt)	DataFrameIndex
MultiIndexSeries0123456789123456c                
   	 t        |       }t        j                  |g|       } t        j                  d      }t        j                  |      t        j                  d      z   }d}t        |       D ]4  \  }}||z
  }||z  }||z  }|t        j                  d|z   |z         z  }|}6 |dz   |k(  sJ d       |t        j                  d      z  }|S # t        $ r( t        j                  g t        j                        cY S w xY w)	z
    Parameters
    ----------
    arrays : Iterator[np.ndarray]
    num_items : int

    Returns
    -------
    np.ndarray[uint64]

    Should be the same as CPython's tupleobject.c
    )dtypeiCB ixV4 r   iXB    zFed in wrong num_itemsi| )	nextStopIterationnparrayuint64	itertoolschain
zeros_like	enumerate)	arrays	num_itemsfirstmultoutlast_iia	inverse_is	            O/var/www/horilla/myenv/lib/python3.12/site-packages/pandas/core/util/hashing.pycombine_hash_arraysr-   /   s    -V __eWf-F99WD
--
8!4
4CF&! 1M	qt		%)+i788 A:"<$<<"299UCJ!  -xx")),,-s   C .DDTutf8c                f    ddl m} t        t         t              r |t               dd      S t         t              r7t         j                        j                  dd      } || dd      }|S t         t              rtt         j                        j                  dd      }|r1 fdd	D        }t        j                  |g|      }	t        |	d
      } || j                  dd      }|S t         t              rfd j!                         D        }
t#         j$                        }|r2 fdd	D        }|dz  }t        j                  |
|      }d |D        }
t        |
|      } || j                  dd      }|S t'        dt)                      )a>  
    Return a data hash of the Index/Series/DataFrame.

    Parameters
    ----------
    obj : Index, Series, or DataFrame
    index : bool, default True
        Include the index in the hash (if Series/DataFrame).
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    Series of uint64, same length as the object

    Examples
    --------
    >>> pd.util.hash_pandas_object(pd.Series([1, 2, 3]))
    0    14639053686158035780
    1     3869563279212530728
    2      393322362522515241
    dtype: uint64
    r   )r   r   F)r   copyr0   )indexr   r0   c              3  f   K   | ](  }t        j                  d       j                   * ywF)r2   encodinghash_key
categorizeNhash_pandas_objectr2   _values.0_r7   r5   r6   objs     r,   	<genexpr>z%hash_pandas_object.<locals>.<genexpr>   s?      	  #II%%) '	   .1N   c              3  T   K   | ]  \  }}t        |j                         ! y wrA   )
hash_arrayr:   )r<   r=   seriesr7   r5   r6   s      r,   r?   z%hash_pandas_object.<locals>.<genexpr>   s,      
6 v~~x:F
s   %(c              3  f   K   | ](  }t        j                  d       j                   * ywr4   r8   r;   s     r,   r?   z%hash_pandas_object.<locals>.<genexpr>   s?      	$  #II%%) '	$r@   r   c              3      K   | ]  }|  y wrA    )r<   xs     r,   r?   z%hash_pandas_object.<locals>.<genexpr>   s     )Aa)s   zUnexpected type for hashing )pandasr   _default_hash_key
isinstancer   hash_tuplesr
   rD   r:   astyper   r   r    r-   r2   r   itemslencolumns	TypeErrortype)r>   r2   r5   r6   r7   r   hser
index_iterr#   hashesr$   index_hash_generator_hashess   ` ```         r,   r9   r9   S   s   F $#}%k#x:(QVWW	C	"s{{Hh
CJJ5 K 
 Qc>d Ja 
C	#s{{Hh
CJJ5 K 
 	  	J __aS*5F#FA.AQciixeD< J9 
C	&
 YY[
 $		$  	$  NI  oof.BCG))F	2QciixeD J 6tCykBCC    c           
        t        |       st        d      ddlm}m} t        | t              s |j                  |       }n| }t        |j                        D cg c]9  }|j                  |j                  |   t        |j                  |   d            ; }}fd|D        }t        |t        |            }	|	S c c}w )a  
    Hash an MultiIndex / listlike-of-tuples efficiently.

    Parameters
    ----------
    vals : MultiIndex or listlike-of-tuples
    encoding : str, default 'utf8'
    hash_key : str, default _default_hash_key

    Returns
    -------
    ndarray[np.uint64] of hashed values
    z'must be convertible to a list-of-tuplesr   )Categoricalr   F
categoriesorderedc              3  F   K   | ]  }|j                  d         yw)Fr5   r6   r7   N)_hash_pandas_object)r<   catr5   r6   s     r,   r?   zhash_tuples.<locals>.<genexpr>   s,       	HQVWs   !)r   rR   rJ   r\   r   rL   r   from_tuplesrangenlevels_simple_newcodesr   levelsr-   rP   )
valsr5   r6   r\   r   milevelcat_valsrW   rT   s
    ``       r,   rM   rM      s    $ ABB
 dM*#Z##D) 2::&
 	 	HHUO		%(8%H	
H F 	FCM2AHs   >C c                   t        | d      st        d      t        | t              r| j	                  |||      S t        | t
        j                        s"t        dt        |       j                   d      t        | |||      S )a  
    Given a 1d array, return an array of deterministic integers.

    Parameters
    ----------
    vals : ndarray or ExtensionArray
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    ndarray[np.uint64, ndim=1]
        Hashed values, same length as the vals.

    Examples
    --------
    >>> pd.util.hash_array(np.array([1, 2, 3]))
    array([ 6238072747940578789, 15839785061582574730,  2185194620014831856],
      dtype=uint64)
    r   zmust pass a ndarray-likera   z6hash_array requires np.ndarray or ExtensionArray, not z!. Use hash_pandas_object instead.)
hasattrrR   rL   r	   rb   r   ndarrayrS   __name___hash_ndarray)rj   r5   r6   r7   s       r,   rD   rD      s    > 4!233$)*''Z ( 
 	
 dBJJ'DDz""##DF
 	

 x:>>rZ   c                f   | j                   }t        j                  |t        j                        r8t	        | j
                  |||      }t	        | j                  |||      }|d|z  z   S |t        k(  r| j                  d      } n"t        |j                  t        j                  t        j                  f      r#| j                  d      j                  dd      } nt        |j                  t        j                        rG|j                  dk  r8| j                  d| j                   j                         j                  d      } n`|rPdd	lm}m}m}	  |	| d
      \  }
}t)         ||      d      }|j+                  |
|      }|j-                  ||d      S 	 t/        | ||      } | | dz	  z  } | t        j6                  d      z  } | | dz	  z  } | t        j6                  d      z  } | | dz	  z  } | S # t0        $ r6 t/        | j                  t2              j                  t4              ||      } Y w xY w)z!
    See hash_array.__doc__.
       u8i8Fr1      ur   )r\   r   	factorize)sortr]   ra      l   e9z    l   b&&&	    )r   r   
issubdtype
complex128rr   realimagboolrN   
issubclassrS   
datetime64timedelta64viewnumberitemsizerJ   r\   r   ry   r   rg   rb   r   rR   strobjectr   )rj   r5   r6   r7   r   	hash_real	hash_imagr\   r   ry   rh   r^   rc   s                r,   rr   rr     s    JJE 
}}UBMM*!$))XxL	!$))XxL	2	>)) }{{4 	EJJ ?	@yy%%d%7	EJJ			*u~~/Byy1TZZ00123::4@
   !*$U ;E:$j0A5QE))%7C**!H +  	$T8X>D 	DBJDBII())DDBJDBII())DDBJDK  	$C ''/8D	s   G1 1<H0/H0)r#   zIterator[np.ndarray]r$   intreturnnpt.NDArray[np.uint64])r>   zIndex | DataFrame | Seriesr2   r   r5   r   r6   z
str | Noner7   r   r   r   )rj   z+MultiIndex | Iterable[tuple[Hashable, ...]]r5   r   r6   r   r   r   )
rj   r   r5   r   r6   r   r7   r   r   r   )
rj   z
np.ndarrayr5   r   r6   r   r7   r   r   r   )&__doc__
__future__r   r   typingr   numpyr   pandas._libs.hashingr   pandas.core.dtypes.commonr   pandas.core.dtypes.dtypesr   pandas.core.dtypes.genericr   r	   r
   r   r   collections.abcr   r   r   pandas._typingr   r   rJ   r   r   r   r   rK   r-   r9   rM   rD   rr   rH   rZ   r,   <module>r      sq   #     2 2 6   
  ' ! !-0!!L ,a	#aa a 	a
 a aL %/
5// / 	/h %	.?
.?.? .? 	.?
 .?f %	9
99 9 	9
 9rZ   