
    Owg_g                     
   d dl m Z  d dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
mZmZ d Zd Zd Zd Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zej6                  j9                  dddg      d        Zd Zd Zd Z d Z!ej6                  j9                  ddddgfdd dgfg      d        Z"d Z#d  Z$ej6                  j9                  d!g d"fg d#fg d$fg d%fg d&d"fg d&d%fg d&d$fg d&d%fg      d'        Z%d( Z&d) Z'd* Z(d+ Z)d, Z*d- Z+y).    )datetimeN)
ArrowDtype)	DataFrameIndex
MultiIndexSeries_testingc                     t        dt        j                  dg|       }t        j                  t
        d      5  |j                  j                  dd        d d d        y # 1 sw Y   y xY w)NfooBAD__barBADfoodtypezexpand must be True or Falsematch.*(BAD[_]+).*(BAD)expand)r   npnanpytestraises
ValueErrorstrextract)any_string_dtypevaluess     X/var/www/horilla/myenv/lib/python3.12/site-packages/pandas/tests/strings/test_extract.py+test_extract_expand_kwarg_wrong_type_raisesr      sW    %rvvu5=MNF	z)G	H >

/=> > >s   A!!A*c                 b   t        dt        j                  dg|       }t        dt        j                  t        j                  g|       }|j                  j                  d      }t        j                  ||       |j                  j                  dd      }t        j                  ||       t        ddgt        j                  t        j                  gt        j                  t        j                  gg|       }|j                  j                  d	d
      }t        j                  ||       y )Nr   r   r   BAD__z.*(BAD[_]+).*Tr   BADr   F)r   r   r   r   r   r   tmassert_frame_equal)r   sexpectedresults       r   test_extract_expand_kwargr'      s     "&&%08HIA'2662662:JKHUU]]?+F&(+UU]]?4]8F&(+
5	BFFBFF+bffbff-=>FVH UU]]/]>F&(+    c                     t        dt        j                  ddt        j                         dd ddg	      } | j
                  j                  dd	      }t        j                  t        j                  g}t        d
dg|d
dg||||||g	t              }t        j                  ||       | j
                  j                  dd	      }t        d
t        j                  d
t        j                  t        j                  t        j                  d t        j                  t        j                  g	t              }t        j                  ||       y )NaBAD_BAD	BAD_b_BADTr             @r   Fr   BAD_r!   r   z.*(BAD[_]+).*BAD)r   r   r   r   todayr   r   r   objectr"   r#   assert_series_equal)serr&   err%   s       r   &test_extract_expand_False_mixed_objectr4   *   s    
	RVV[$0@%qRUVC
 WW__1%_@F
&&"&&	B
%"vuor2r2r2FfH &(+ WW__/_>F	rvvrvvNH 68,r(   c                      t        g d      } d}t        j                  t        |      5  | j                  j                  dd       d d d        y # 1 sw Y   y xY w)N)A1A2A3A4B5z,only one regex group is supported with Indexr   ([AB])([123])Fr   )r   r   r   r   r   r   )idxmsgs     r    test_extract_expand_index_raisesr>   @   sK     .
/C
8C	z	- 767 7 7s   AAc                 T    | g d|      }d}t        j                  t        |      5  |j                  j	                  dd       d d d        t        j                  t        |      5  |j                  j	                  dd       d d d        y # 1 sw Y   KxY w# 1 sw Y   y xY w)	Nr6   B2C3r   "pattern contains no capture groupsr   
[ABC][123]Fr   
(?:[AB]).*r   r   r   r   r   index_or_seriesr   s_or_idxr=   s       r   ,test_extract_expand_no_capture_groups_raisesrJ   J   s    19IJH
.C 
z	- 9\%89 
z	- 9\%89 9	9 99 9   B+BBB'c                      | ddg|      }|j                   j                  dd      } | ddgd|	      }| t        k(  rt        j                  ||       y t        j
                  ||       y )
Nr6   r7   r   (?P<uno>A)\dFr   Aunonamer   )r   r   r   r"   r1   assert_index_equalrH   r   rI   r&   r%   s        r   (test_extract_expand_single_capture_grouprT   W   sg    d|3CDH\\!!/%!@FSz=MNH& 
vx0
fh/r(   c                 	   t        g d|       }|j                  j                  dd      }t        t        j                  t        j                  t        j                  g|       }t        j                  ||       |j                  j                  dd      }t        t        j                  t        j                  gt        j                  t        j                  gt        j                  t        j                  gg|       }t        j                  ||       |j                  j                  dd      }t        dd	t        j                  g|       }t        j                  ||       |j                  j                  d
d      }t        ddgd	dgt        j                  t        j                  gg|       }t        j                  ||       |j                  j                  dd      }t        dd	t        j                  gd|       }t        j                  ||       |j                  j                  dd      }t        ddgd	dgt        j                  t        j                  ggddg|       }t        j                  ||       |j                  j                  dd      }t        ddgd	dgt        j                  t        j                  ggddg|       }t        j                  ||       |j                  j                  dd      }t        dd	t        j                  g|       }t        j                  ||       t        g d|       }|j                  j                  dd      }t        ddgd	dgt        j                  t        j                  gg|       }t        j                  ||       t        g d|       }|j                  j                  dd      }t        ddgd	dgt        j                  dggddg|       }t        j                  ||       t        g d|       }|j                  j                  dd      }t        ddgd	dgdt        j                  ggddg|       }t        j                  ||       y )Nr@   r   (_)Fr   (_)(_)([AB])[123]rN   Br;   12(?P<letter>[AB])letterrP   !(?P<letter>[AB])(?P<number>[123])numbercolumnsr   ([AB])(?P<number>[123])r   ([AB])(?:[123])A11B22C33([AB])([123])(?:[123])r6   rA   3"(?P<letter>[AB])?(?P<number>[123])rj   r6   rA   C#(?P<letter>[ABC])(?P<number>[123])?rm   )	r   r   r   r   r   r"   r1   r   r#   r   r$   r&   r%   s       r   "test_extract_expand_capture_groupsrp   c   s   !)9:AUU]]5]/Frvvrvvrvv.6FGH68, UU]]8E]2F
&&"&&	BFFBFF+bffbff-=>FVH &(+ UU]]=]7FsC(0@AH68, UU]]?5]9F
sc3Z"&&"&&!12:JH &(+ UU]]-e]<FsC(x?OPH68, UU]]>u]MF
sc3Z"&&"&&!128$H
 &(+ UU]]4U]CF
sc3Z"&&"&&!12HH
 &(+ UU]],U];FsC(0@AH68, 	$,<=AUU]]3E]BF
sc3Z"&&"&&!12:JH &(+ 	 (89AUU]]?]NF
sc3Z"&&#/8$H
 &(+ 	 (89AUU]]@]OF
sc3Z#rvv/8$H
 &(+r(   c                 d   g d}t        |       dk(  rt        j                  d       t        |       t        |      k  r)| j                  d      } t        |       t        |      k  r)| d t        |       } t	        || |      }|j
                  j                  dd      }t	        d	d
t        j                  g| |      }t        j                  ||       |j
                  j                  dd      }t        dd	gdd
gdt        j                  ggddg| |      }t        j                  ||       y )Nrl   r   zTest requires len(index) > 0   indexr   (\d)Fr   rZ   r[   (?P<letter>\D)(?P<number>\d)?rN   rY   rm   r]   r_   ra   rt   r   )lenr   skiprepeatr   r   r   r   r   r"   r1   r   r#   )rt   r   datar2   r&   r%   s         r   (test_extract_expand_capture_groups_indexr|      s    D
5zQ23
e*s4y
 Q e*s4y
  +CIE
U*:
;CWW__WU_3FsC(=MNH68,WW__=e_LF
sc3Z#rvv/8$	H &(+r(   c                     t        g dd|       }|j                  j                  dd      }t        g dd|       }t        j                  ||       y )	Na3b3c2bobrP   z(?P<sue>[a-z])Fr   abcsue)r   r   r   r"   r1   ro   s       r   ,test_extract_single_series_name_is_preservedr      sH    !5EFAUU]],U];FoE9IJH68,r(   c                 B   t        dt        j                  dg|       }|j                  j	                  dd      }t        ddgt        j                  t        j                  gt        j                  t        j                  gg|       }t        j                  ||       y )	Nr   r   r   r   Tr   r    r!   )r   r   r   r   r   r   r"   r#   ro   s       r   test_extract_expand_Truer      sz     "&&%08HIAUU]]/]=F
5	BFFBFF+bffbff-=>FVH &(+r(   c                  N   t         j                  t         j                  g} t        dt         j                  ddt        j                         dd ddg	      }|j
                  j                  dd      }t        d	d
g| d	d
g| | | | | | g	t              }t        j                  ||       y )Nr*   r+   Tr   r,   r-   r   r   r.   r!   r   )r   r   r   r   r/   r   r   r   r0   r"   r#   )r3   mixedr&   r%   s       r   %test_extract_expand_True_mixed_objectr      s    
&&"&&	BFFNN
	
E YY3DAF
%"vuor2r2r2FfH &(+r(   c                 T    | g d|      }d}t        j                  t        |      5  |j                  j	                  dd       d d d        t        j                  t        |      5  |j                  j	                  dd       d d d        y # 1 sw Y   KxY w# 1 sw Y   y xY w)	Nr@   r   rC   r   rD   Tr   rE   rF   rG   s       r   4test_extract_expand_True_single_capture_group_raisesr      s    
 19IJH
.C	z	- 8\$78 
z	- 8\$78 8	8 88 8rK   c                      | ddg|      }|j                   j                  dd      }t        dddgi|      }t        j                  ||       y )	Nr6   r7   r   rM   Tr   rO   rN   )r   r   r   r"   r#   rS   s        r   -test_extract_expand_True_single_capture_groupr   	  sR    d|3CDH\\!!/$!?F%#s,4DEH&(+r(   rQ   series_namec                    t        g d| |      }|j                  j                  dd      }t        t        j
                  t        j
                  t        j
                  g|      }t        j                  ||       |j                  j                  dd      }t        t        j
                  t        j
                  gt        j
                  t        j
                  gt        j
                  t        j
                  gg|      }t        j                  ||       |j                  j                  dd      }t        d	d
t        j
                  g|      }t        j                  ||       |j                  j                  dd      }t        d	dgd
dgt        j
                  t        j
                  gg|      }t        j                  ||       |j                  j                  dd      }t        dd	d
t        j
                  gi|      }t        j                  ||       |j                  j                  dd      }t        d	dgd
dgt        j
                  t        j
                  ggddg|      }t        j                  ||       |j                  j                  dd      }t        d	dgd
dgt        j
                  t        j
                  ggddg|      }t        j                  ||       |j                  j                  dd      }t        d	d
t        j
                  g|      }t        j                  ||       y )Nr@   rP   rV   Tr   r   rW   rX   rN   rY   r;   rZ   r[   r\   r]   r^   r_   r`   rb   r   rc   r   r   r   r   r   r   r"   r#   )rQ   r   r$   r&   r%   s        r   test_extract_seriesr     s    	!4DEA UU]]5].F"&&"&&"&&19IJH&(+ UU]]8D]1F
&&"&&	BFFBFF+bffbff-=>FVH &(+ UU]]=]6F#sBFF+3CDH&(+ UU]]?4]8F
sc3Z"&&"&&!12:JH &(+ UU]]-d];F(S#rvv$67?OPH&(+ UU]]>t]LF
sc3Z"&&"&&!128$H
 &(+ UU]]4T]BF
sc3Z"&&"&&!12HH
 &(+ UU]],T]:F#sBFF+3CDH&(+r(   c                    t        g d|       }|j                  j                  dd      }t        ddgdd	gt        j
                  t        j
                  gg|       }t        j                  ||       t        g d
|       }|j                  j                  dd      }t        ddgdd	gt        j
                  dggddg|       }t        j                  ||       t        g d|       }|j                  j                  dd      }t        ddgdd	gdt        j
                  ggddg|       }t        j                  ||       y )Nrd   r   rh   Tr   rN   rZ   rY   r[   ri   rk   rj   r]   r_   r`   rl   rn   rm   r   ro   s       r   test_extract_optional_groupsr   K  s5   $,<=AUU]]3D]AF
sc3Z"&&"&&!12:JH &(+ 	 (89AUU]]?]MF
sc3Z"&&#/8$H
 &(+ 	 (89AUU]]@]NF
sc3Z#rvv/8$H
 &(+r(   c                    g d}t        |       t        |      k  r"t        j                  dt        |       d       | d t        |       } t        || |      }|j                  j                  dd      }t        dd	t        j                  g| |      }t        j                  ||       |j                  j                  d
d      }t        ddgdd	gdt        j                  ggddg| |      }t        j                  ||       y )Nrl   zIndex needs more than z valuesrs   ru   Tr   rZ   r[   rv   rN   rY   rm   r]   r_   rw   )rx   r   ry   r   r   r   r   r   r   r"   r#   )rt   r   r{   r$   r&   r%   s         r   +test_extract_dataframe_capture_groups_indexr   i  s     D
5zCI,SYKw?@+CIEt5(89AUU]]74]0F#sBFF+5@PQH&(+UU]];D]IF
sc3Z#rvv/8$	H &(+r(   c                     t        g dd|       }|j                  j                  dd      }t        dg di| 	      }t	        j
                  ||       y )
Nr~   r   rP   (?P<letter>[a-z])Tr   r]   r   r   )r   r   r   r   r"   r#   ro   s       r   'test_extract_single_group_returns_framer     sM     	!=MNAUU]]/]=F(O4<LMH&(+r(   c                 :   dddddt         j                  dg}g d}d}g d	}t        || 
      }t        j                  g dd      }t        |||| 
      }|j                  j                  |t        j                        }t        j                  ||       t        j                  g d      }	t        ||	|       }t        j                  g dd      }t        |||| 
      }|j                  j                  |t        j                        }t        j                  ||       t        ||	|       }d|j                  _        d|_        t        |||| 
      }|j                  j                  |t        j                        }t        j                  ||       y )Nzdave@google.comztdhock5@gmail.comzmaudelaperriere@gmail.comz'rob@gmail.com some text steve@gmail.comz%a@b.com some text c@d.com and e@f.com ))davegooglecom)tdhock5gmailr   )maudelaperrierer   r   )robr   r   )stever   r   )r   r   r   )r   dr   )efr   zY
    (?P<user>[a-z0-9]+)
    @
    (?P<domain>[a-z]+)
    \.
    (?P<tld>[a-z]{2,4})
    )userdomaintldr   )r   r   r,   r   rr   r   )   r   )r   r,   )   r   )r   r,   )r   rr   Nr   names)flags))singleDave)r   Toby)r   Maude)multiplerobAndSteve)r   abcdef)nonemissing)r   emptyrs   ))r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r,   )r   r   r   )r   r   r,   )r   r   rr   )NNr   )matchesdescription)r   r   r   )r   r   r   r   from_tuplesr   r   
extractallreVERBOSEr"   r#   rt   r   )
r   r{   expected_tuplespatexpected_columnsr$   expected_indexr%   r&   mis
             r   test_extractallr     s   #1/

D	OC 1t+,A  ++HN )9AQH UUc4F&(+ 
			


B 	t2%56A++		
 $N )9AQH UUc4F&(+ 	t2%56A.AGGM>N)9AQH UUc4F&(+r(   zpat,expected_namesrk   r]   r_   z([AB])?(?P<number>[123])c                    t        g d|      }|j                  j                  |       }t        dt        j
                  dft        j
                  dfgt        j                  g dd      ||	      }t        j                  ||       y )
N)r   r6   32r   )rN   rZ   rj   r[   )r   r   rr   r,   r   r   )rt   ra   r   )
r   r   r   r   r   r   r   r   r"   r#   )r   expected_namesr   r$   r&   r%   s         r   test_extractall_column_namesr     ss     	'78AUUc"F	bffc]RVVSM2$$%=_U	H &(+r(   c                 ^   t        g dd|       }t        j                  g dd      }|j                  j	                  d      }t        dg d	i|| 
      }t        j                  ||       |j                  j	                  d      }t        g d	|| 
      }t        j                  ||       y )Nr   r   d4c2r   rP   r   r   r   r   r   r   r   r]   )r   r   r   r   rs   ([a-z]))r   r   r   r   r   r   r"   r#   )r   r$   r   r&   r%   s        r   test_extractall_single_groupr     s    #-?OPA++(N
 UU23F	'(FVH &(+ UUj)FN:JH &(+r(   c                     t        g dd|       }|j                  j                  d      }t        g dt	        j
                  g dd      | 	      }t        j                  ||       y )
N)ab3abc3d4cd2r   rP   z([a-z]+))ababcr   cdr   r   r   rs   )r   r   r   r   r   r   r"   r#   ro   s       r   ,test_extractall_single_group_with_quantifierr     s_     	'mCSTAUUk*F $$,O
 H &(+r(   zdata, names)N)i1)Ni2)r   r   r   c                   	 t        |       	t        |      dk(  rt        t        	      |d         }n+	fdt        	      D        }t        j                  ||      }t        | d||      }t        j                  g |dz         }|j                  j                  d	      }t        dg||
      }t        j                  ||       |j                  j                  d      }t        ddg||
      }t        j                  ||       |j                  j                  d      }t        dg||
      }t        j                  ||       |j                  j                  d      }t        ddg||
      }t        j                  ||       |j                  j                  d      }t        ddg||
      }t        j                  ||       y )Nr,   r   rQ   c              3   B   K   | ]  }t        |gd z
  z          yw)r,   N)tuple).0ins     r   	<genexpr>z-test_extractall_no_matches.<locals>.<genexpr>0  s      91%q1u&9s   r   r   rQ   rt   r   r   z(z)rw   z(z)(z)z(?P<first>z)firstz(?P<first>z)(?P<second>z)secondz(z)(?P<second>z))rx   r   ranger   r   r   r   r   r   r"   r#   )
r{   r   r   rt   tuplesr$   r   r&   r%   r   s
            @r   test_extractall_no_matchesr     s    	D	A
5zQeAhU1X.9a9&&vU;t-u<LMA++Buz7IKN UUe$F!NBRSH&(+ UUh'F!Q~EUVH&(+ UUn-F	7GH &(+ UU9:F(#>AQH &(+ UU01FH^;KH &(+r(   c                    t        g dd|       }|j                  j                  d      }t        dg dit	        j
                  g dd dg	      | 
      }t        j                  ||       | dk(  r]t        g dt              t        g ddt              fD ]3  }|j                  j                  d      }t        j                  ||       5 t        g ddt        g dd      |       }|j                  j                  d      }t        dg dit	        j
                  g dddg	      | 
      }t        j                  ||       y )N)a1a2b1c1xxxrP   z[ab](?P<digit>\d)digit)rZ   r[   rZ   )r   )r   r,   r   r   r   rs   r0   r   s_name)XXyyzzidx_namer   r   ))r   r   )r   r,   )r   r   )
r   r   r   r   r   r   r"   r#   r   r0   )r   r$   r&   r%   r<   s        r   test_extractall_stringindexr   U  s5   #%7GHAUU23F	/"$$%=dG_UH
 &(+ 8#&f5&U&A
 	4C WW''(<=F!!&(3	4 	&Z8		A UU23F	/"$$-j'5J
 H &(+r(   c                     t        g dd|       }t        j                  t        d      5  |j                  j                  d       d d d        y # 1 sw Y   y xY w)Nr   r   rP   zno capture groupsr   z[a-z])r   r   r   r   r   r   )r   r$   s     r   (test_extractall_no_capture_groups_raisesr   z  sJ     	#-?OPA	z)<	= #	"# # #s   AAc                  \   t        g dg dd      } | j                  j                  j                  dd      }t	        g d      }t        j                  ||       | j                  j                  j                  d	d      }g d
}t	        |ddg      }t        j                  ||       y )Nr   )r8   B3D4r   )rt   rQ   z([A-Z])Tr   )rN   rY   Dz!(?P<letter>[A-Z])(?P<digit>[0-9])))rN   rj   )rY   rj   )r   4r]   r   )ra   )r   rt   r   r   r   r"   r#   )r$   rr   e_lists       r   !test_extract_index_one_two_groupsr    s    #+=MRA	Jt4A/"A!Q
 	
@NA1F&8W"56A!Qr(   c                 <   t        g dd|       }d}|j                  j                  |d      }|j                  j                  |      }|j	                  dd	      }t        j                  ||       d
}|j                  j                  |d      }|j                  j                  |      }|j	                  dd	      }t        j                  ||       d}|j                  j                  |d      }	|j                  j                  |      }|j	                  dd	      }t        j                  |	|       d}
|j                  j                  |
d      }|j                  j                  |
      }|j	                  dd	      }t        j                  ||       y )Nr~   r   rP   ([a-z])([0-9])Tr   r   r   level!(?P<letter>[a-z])(?P<digit>[0-9])(?P<group_name>[a-z])r   )r   r   r   r   xsr"   r#   )r   r$   pattern_two_nonameextract_two_nonamehas_multi_indexno_multi_indexpattern_two_namedextract_two_namedpattern_one_namedextract_one_namedpattern_one_nonameextract_one_nonames               r   test_extractall_same_as_extractr    sv   !=MNA*'9$Gee&&'9:O$'''9N,n=<&7Eee&&'89O$'''9N+^<0&7Eee&&'89O$'''9N+^<#'9$Gee&&'9:O$'''9N,n=r(   c                 p   t        j                  g dd      }t        g d|d|       }d}|j                  j	                  |d	      }|j                  j                  |      }|j                  d
d      }t        j                  ||       d}|j                  j	                  |d	      }|j                  j                  |      }|j                  d
d      }t        j                  ||       d}	|j                  j	                  |	d	      }
|j                  j                  |	      }|j                  d
d      }t        j                  |
|       d}|j                  j	                  |d	      }|j                  j                  |      }|j                  d
d      }t        j                  ||       y )N))rN   r   )rY   r   )rm   third)capitalordinalr   r~   r   )rt   rQ   r   r  Tr   r   r   r  r	  r
  r   )	r   r   r   r   r   r   r  r"   r#   )r   r   r$   r  r  has_match_indexno_match_indexr  r  r  r  r  r  s                r   -test_extractall_same_as_extract_subject_indexr    s   			9$
B 	!-GWXA*'9$Gee&&'9:O$'''9N,n=<&7Eee&&'89O$'''9N+^<0&7Eee&&'89O$'''9N+^<#'9$Gee&&'9:O$'''9N,n=r(   c                      t        j                  d      } t        ddgt        | j	                                     j
                  j                  d      }|j                  d   dk(  sJ y )Npyarrowr   r   r   z(ab)r   zstring[pyarrow])r   importorskipr   r   stringr   r   dtypes)par&   s     r   test_extractall_preserves_dtyper$    sY     
		Y	'BUDMBIIK)@AEEPPQWXF==0000r(   ),r   r   numpyr   r   pandas.core.dtypes.dtypesr   pandasr   r   r   r   r	   r"   r   r'   r4   r>   rJ   rT   rp   r|   r   r   r   r   r   markparametrizer   r   r   r   r   r   r   r   r   r   r   r  r  r  r$   r(   r   <module>r+     s|    	   0 >,"-,7
9	0P,f,6-,,.8, $!676, 86,r,<,4,R,j  
.(/CD	#a]3	
,
,,*,  	W	W	\	\	w'	|,	|,	|,	(,(,V",J# >8>B1r(   