
    Owg%R                        d Z ddlmZ ddlmZ ddlZddlZddlm	Z	 ddl
mZmZmZ ddlmZ ej"                  j%                  d      Zej"                  j)                  d      Zej"                  j)                  d	      Zed
        Zd Zej"                  j3                  dg dddgg      d        Zej"                  j3                  dddg      d        Zed        Zd Zed        Zej"                  j3                  ddddgidfddgddgddfddgddgddfg      d         Z ed!d"d#g      Z eej"                  j3                  d$dddgid%g d&d'd% e d(d)       e d(d*       e d(d+       e d,d-       e d.d/       e d.d0      gd'g      d1               Z!eej"                  j3                  d$dddgid2g d&d'd2 e d(d)       e d(d*       e d(d+       e d,d-       e d.d/       e d.d0      gd'g      d3               Z"eej"                  j3                  d$dddgid2g d&d'd2 e d(d)       e d(d*       e d(d+       e d,d-       e d.d/       e d.d0      gd'g      d4               Z#ed5        Z$ed6        Z%ed7        Z&ed8        Z'ej"                  j3                  d9d:d;g      d<        Z(eej"                  j3                  d$i ddig      d=               Z)ej"                  j3                  d>i g d?fd@g dAig dAfg      dB        Z*ej"                  j3                  dd(d,gdCg      dD        Z+edE        Z,eej"                  j3                  dFdG eg dHg ejZ                  g dI      J      fdK eg dHg ejZ                  g dL      J      fdM eg dNg ejZ                  g dO      J      fg      dP               Z.eej"                  j3                  dddgg      ej"                  j3                  dQddRdSgdSdRgdSdTgg      dU                      Z/edV        Z0edW        Z1edX        Z2edY        Z3edZ        Z4d[ Z5ed\        Z6ed]        Z7ed^        Z8d_ Z9y)`zx
Tests that the file header is properly handled or inferred
during parsing for all of the parsers defined in parsers.py
    )
namedtuple)StringION)ParserError)	DataFrameIndex
MultiIndexz=ignore:Passing a BlockManager to DataFrame:DeprecationWarningpyarrow_xfailpyarrow_skipc                     | }d}t        j                  t        |      5  t        d      }|j	                  |dg       d d d        y # 1 sw Y   y xY w)Nzbut only \d+ lines in filematchz,,
   header)pytestraises
ValueErrorr   read_csv)all_parsersparsermsgss       Y/var/www/horilla/myenv/lib/python3.12/site-packages/pandas/tests/io/parser/test_header.pytest_read_with_bad_headerr      sJ    F
'C	z	- (TN2$'( ( (    A		Ac                     | }d}t        j                  t        d      5  |j                  t	        |      d       d d d        y # 1 sw Y   y xY w)N$1,2,3,4,5
6,7,8,9,10
11,12,13,14,15
zUPassing negative integer to header is invalid. For no header, use header=None insteadr   r   r   r   r   r   r   r   r   datas      r   test_negative_headerr"   '   sO    FD 
1
 3
 	r23 3 3   AAr   )r         c                     | }d}t        j                  t        d      5  |j                  t	        |      |       d d d        y # 1 sw Y   y xY w)Nz<1,2,3,4,5
        6,7,8,9,10
        11,12,13,14,15
        z8cannot specify multi-index header with negative integersr   r   r   )r   r   r   r!   s       r    test_negative_multi_index_headerr(   6   sO     FD 
T
 7 	v67 7 7r#   TFc                     | }d}d}t        j                  t        |      5  |j                  t	        |      |       d d d        y # 1 sw Y   y xY w)NzMyColumn
a
b
a
bz#Passing a bool to header is invalidr   r   )r   r   	TypeErrorr   r   )r   r   r   r!   r   s        r   test_bool_header_argr+   D   sN     FD 0C	y	, 7v67 7 7   AAc                     | }d}g d}|j                  t        |      |      }t        g dg dg dgg dg d      }t        j                  ||       y )	Nzfoo,1,2,3
bar,4,5,6
baz,7,8,9
ABCnames   r$      )r%         )      	   )foobarbazindexcolumnsr   r   r   tmassert_frame_equal)r   r   r!   r3   resultexpecteds         r   test_header_with_index_colrG   S   sW    FD E__Xd^5_9F	Iy)#H
 &(+    c                     | }d}d}|j                  t        |      dd      }|j                  t        |      dd      }t        j                  ||       y )Nzggot,to,ignore,this,line
got,to,ignore,this,line
index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
z7index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
r$   r   r   	index_colr   r   rC   rD   )r   r   r!   data2rE   rF   s         r   test_header_not_first_linerN   e   sW    FDE __Xd^A_CFxqAFH&(+rH   c                    | }d}|j                  t        |      g dddg      }d }t        d      D cg c]"  }t        d      D cg c]  } |||       c}$ }}}t        j                  t        d      D cg c]  }d	| 	 c}t        d      D cg c]  }d
| 	 c}gddg      }t        j                  t        d      D cg c]  }d| 	 c}t        d      D cg c]  }d| 	 c}t        d      D cg c]  }d| 	 c}t        d      D cg c]  }d| 	 c}gg d      }	t        ||	|      }
t        j                  ||
       y c c}w c c}}w c c}w c c}w c c}w c c}w c c}w c c}w )N  C0,,C_l0_g0,C_l0_g1,C_l0_g2

C1,,C_l1_g0,C_l1_g1,C_l1_g2
C2,,C_l2_g0,C_l2_g1,C_l2_g2
C3,,C_l3_g0,C_l3_g1,C_l3_g2
R0,R1,,,
R_l0_g0,R_l1_g0,R0C0,R0C1,R0C2
R_l0_g1,R_l1_g1,R1C0,R1C1,R1C2
R_l0_g2,R_l1_g2,R2C0,R2C1,R2C2
R_l0_g3,R_l1_g3,R3C0,R3C1,R3C2
R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2
r   r5   r$   r6   r   r5   rJ   c                     d|  d| S )NRr1    )rcs     r   <lambda>z)test_header_multi_index.<locals>.<lambda>   s    !AaSk rH   r7   r6   R_l0_gR_l1_gR0R1r2   C_l0_gC_l1_gC_l2_gC_l3_g)C0C1C2C3)rA   r@   )r   r   ranger   from_arraysr   rC   rD   )r   r   r!   rE   
data_gen_frU   rV   ir@   rA   rF   s              r   test_header_multi_indexrh   y   s`   FD __Xd^LQPQF_SF)J:?(CQuQx0!Z10CDC"" %a	)1F1#,	)%(+KQfQCL+KLTlE $$#(8,avaS\,#(8,avaS\,#(8,avaS\,#(8,avaS\,		
 'G we<H&(+ 1C	)+K
 -,,,s;   E	EE=E%E*
E/'E4
E9E>Ez
kwargs,msgrK   r<   r=   zLindex_col must only contain row numbers when specifying a multi-index headerr5   )rK   r3   z9cannot specify names when specifying a multi-index header)rK   usecolsz;cannot specify usecols when specifying a multi-index headerc                     d}| }t        j                  t        |      5   |j                  t	        |      fdg di| d d d        y # 1 sw Y   y xY w)NrP   r   r   rQ   r   )r   kwargsr   r!   r   s        r   test_header_multi_index_invalidrl      sS    ,D F	z	- GF|FvFG G Gs   !A

A
_TestTuplefirstsecondrk   r6   )aq)rq   rU   )rq   r   )bt)rV   u)rV   v)skiprowsr3   rq   rr   rU   r   rs   rt   rV   ru   rv   c                     | }t        g dg dgddgt        j                  g d            }d} |j                  t	        |      fdd	i|}t        j                  ||       y )
Nr5   r$   r6   r%   r7   r8   r9   r:   r;   r         onetworp   r?   zC,a,a,a,b,c,c
,q,r,s,t,u,v
,,,,,,
one,1,2,3,4,5,6
two,7,8,9,10,11,12rK   r   r   r   from_tuplesr   r   rC   rD   r   rk   r   rF   r!   rE   s         r   &test_header_multi_index_common_format1r      sk    < F	23en&&T
HD V__Xd^CqCFCF&(+rH   r$   c                     | }t        g dg dgddgt        j                  g d            }d} |j                  t	        |      fdd	i|}t        j                  ||       y )
Nry   rz   r}   r~   rp   r?   z<,a,a,a,b,c,c
,q,r,s,t,u,v
one,1,2,3,4,5,6
two,7,8,9,10,11,12rK   r   r   r   s         r   &test_header_multi_index_common_format2r      sk    < F	23en&&T
HD
 V__Xd^CqCFCF&(+rH   c                     | }t        g dg dgddgt        j                  g d            }|j                  d      }d	} |j                  t        |      fd
d i|}t        j                  ||       y )Nry   rz   r}   r~   rp   r?   T)drop2a,a,a,b,c,c
q,r,s,t,u,v
1,2,3,4,5,6
7,8,9,10,11,12rK   )r   r   r   reset_indexr   r   rC   rD   r   s         r   &test_header_multi_index_common_format3r   *  s    < F	23en&&T
H ###.HD
 V__Xd^FtFvFF&(+rH   c           
         | }t        t        j                  g dg dgd      t        ddg      t	        g dg dgg d	g d
gddg            }d}|j                  t        |      ddgd      }t        j                  ||       y )Nr$   r6   r%   r7   r8   r:   r;   r   r{   r|   int64dtyper5   r9   rq   rs   rV   rU   r   rt   ru   rv   r   r   r5   r$   r$   r   r5   r$   r6   r%   rq   rr   levelscodesr3   r?   r   r   rJ   	r   nparrayr   r   r   r   rC   rD   r   r   rF   r!   rE   s        r   0test_header_multi_index_common_format_malformed1r   Z  s    F
/#56gFQFm#%>?"O4*
HD
 __Xd^QFa_HF(F+rH   c           
         | }t        t        j                  g dg dgd      t        ddg      t	        g dg dgg d	g d
gd dg            }d}|j                  t        |      ddgd      }t        j                  ||       y )Nr   r   r   r   r5   r9   r   r   r   r   rr   r   r?   1,a,a,b,c,c
q,r,s,t,u,v
1,2,3,4,5,6
7,8,9,10,11,12r   rJ   r   r   s        r   0test_header_multi_index_common_format_malformed2r   o  s    F
/#56gFQFm#%>?"O4+
HD
 __Xd^QFa_HF(F+rH   c           
      .   | }t        t        j                  g dg dgd      t        ddgddggd	dgd	dgg
      t        g dg dgg dg dgd dg            }d}|j	                  t        |      d	dgd	dg      }t        j                  ||       y )N)r6   r%   r7   r8   )r;   r   r{   r|   r   r   r5   r9   r$   r:   r   )r   r   r   )r   rt   ru   rv   )r   r5   r$   r$   rQ   rr   r   r?   r   rJ   )r   r   r   r   r   r   rC   rD   r   s        r   0test_header_multi_index_common_format_malformed3r     s    F
,0@!Q!Q 0!Q!Q8HI#%9:.+
HD
 __Xd^QFq!f_MF(F+rH   c                     | }d d gddgddgg}t        j                  ddg      }t        ||      }d}|j                  t	        |      d	dg
      }t        j                  ||       y )Nr5   r$   r6   r%   )rq   r/   )rs   r0   rA   za,b
A,B
,
1,2
3,4r   r   )r   r   r   r   r   rC   rD   )r   r   r!   rA   rF   rE   s         r   "test_header_multi_index_blank_liner     sq     F4L1a&1a&)D$$j*%=>Gw/H"D__Xd^QF_;F(F+rH   zdata,header)1,2,3
4,5,6N)zfoo,bar,baz
1,2,3
4,5,6r   c                 2   | }|j                   dk(  r3|1t        j                  j                  d      }|j	                  |       |j                  t        d      g d      }|j                  t        |      g d|      }t        j                  ||       y )NpyarrowzDataFrame.columns are different)reasonr   r   r2   r3   r   )	enginer   markxfailapplymarkerr   r   rC   rD   )r   r!   r   requestr   r   rF   rE   s           r   !test_header_names_backward_compatr     s    
 F}}	!f&8{{  (I JD!x7OH__Xd^?6_RF&(+rH   c                     | }t        g d      } |j                  t        d      fi |}t        j                  ||       y )Nr   r   za,b,cr   r   r   rC   rD   )r   rk   r   rF   rE   s        r   test_read_only_header_no_rowsr     s=     F1HV__Xg.9&9F&(+rH   zkwargs,namesr   r3   )r<   r=   r>   quuxpandac                     | }d}t        g dg dg dg|      } |j                  t        |      fdd i|}t        j                  ||       y )Nr   )r5   r$   r6   r%   r7   )r8   r9   r:   r;   r   )r{   r|            r   r   r   )r   rk   r3   r   r!   rF   rE   s          r   test_no_headerr     sZ     FD 	*,@A5H V__Xd^CDCFCF&(+rH   string_headerc                     d}d}| }t        j                  t        |      5  |j                  t	        |      |       d d d        y # 1 sw Y   y xY w)Nz*header must be integer or list of integersz1,2
3,4r   r   r   )r   r   r   r!   r   s        r   test_non_int_headerr     sJ     7CDF	z	- 7v67 7 7r,   c                     d}| }t        ddgddgddgd      }|j                  t        |      dg      }t        j                  ||       y )Nza,b,c
0,1,2
1,2,3r   r5   r$   r6   r   r   r   )r   r!   r   rF   rE   s        r   test_singleton_headerr     sS     %DF1vQF!Q@AH__Xd^QC_8F&(+rH   zdata,expectedz#A,A,A,B
one,one,one,two
0,40,34,0.1)r   (   "   皙?)r/   r}   r/   zone.1)r/   zone.2r0   r~   r   z%A,A,A,B
one,one,one.1,two
0,40,34,0.1)r   r   r/   zone.1.1r   z/A,A,A,B,B
one,one,one.1,two,two
0,40,34,0.1,0.1)r   r   r   r   r   )r   r   r   r   )r0   ztwo.1c                 p    | }|j                  t        |      ddg      }t        j                  ||       y )Nr   r5   r   rL   )r   r!   rF   r   rE   s        r   test_mangles_multi_indexr     s4    R F__Xd^QF_;F&(+rH   rA    Unnamed
NotUnnamedc                    | }ddg}|dj                  |xs ddg      dz   }ndj                  dg|xs ddgz         dz   }|j                  t        |      ||      }g }|g d}t        |      D ]%  \  }}	|	sd	||n|dz    d
}	|j	                  |	       ' t        j                  t        |ddg            }t        ddgddgg|      }
t        j                  ||
       y )Nr   r5   ,r   z
0,1
2,3
4,5
z
,0,1
0,2,3
1,4,5
rJ   )r   r   r   z	Unnamed: _level_001r$   r6   r%   r7   r   )joinr   r   	enumerateappendr   r   zipr   rC   rD   )r   rK   rA   r   r   r!   rE   exp_columnsrg   colrF   s              r   test_multi_index_unnamedr     s	    FVFxx+B8,/BBxx 3B8458PP__Xd^Fi_PFKG$  39#4a!a%@IC3	  $$SsCj%ABG1a&1a&)7;H&(+rH   c                     | }d}|j                  t        |      dg d      }t        ddgddgd	d
gd      }t        j                  ||       y )Nza, b
1,2,3
5,6,4
r   r.   )r   r3   r5   r7   r$   r8   r6   r%   rB   r   r   r!   rE   rF   s        r   6test_names_longer_than_header_but_equal_with_data_rowsr   E  sW     FD __Xd^A__MF1vQF!Q@AH&(+rH   c                 N   | }d}d}t        j                  g d      }t        g dg dg|      }|j                  t	        |      ddg	      }t        j                  ||j                  d d        |j                  t	        |      ddg	      }t        j                  ||       y )
NzFMale, Male, Male, Female, Female
R, R, L, R, R
.86, .67, .88, .78, .81z^Male, Male, Male, Female, Female
R, R, L, R, R
.86, .67, .88, .78, .81
.86, .67, .88, .78, .82))MalerS   ) Male R)r   z L) Femaler   )r   z R.1)Q?q=
ףp?)\(?(\?gQ?)r   r   r   r   g=
ףp=?r   r   r5   r   )r   r   r   r   r   rC   rD   iloc)r   r   s1s2mirF   df1df2s           r    test_read_csv_multiindex_columnsr   R  s     F	SB	"  
			

B 	')GHRTH //(2,1v/
6C#x}}Ra01
//(2,1v/
6C#x(rH   c                     | }d}t        j                  t        d      5  |j                  t	        |      ddg       d d d        y # 1 sw Y   y xY w)Nz1row11,row12,row13
row21,row22, row23
row31,row32
z1Header rows must have an equal number of columns.r   r   r$   r   r   r   r   r   r   )r   r   cases      r   'test_read_csv_multi_header_length_checkr   r  sS     FD
 
N
 7 	1v67 7 7s   AAc                     | }d}|j                  t        |      ddgd       }t        g ddt        j                  t        j                  gdg d	      }t        j                  ||       y )
Nzx,1,5
y,2
z,3
rq   rs   r   r4   r7   rq   rs   )xyz)r@   )r   r   r   r   nanrC   rD   r   s        r   #test_header_none_and_implicit_indexr     s_     FD__Xd^C:d_KFq"&&"&&12/H &(+rH   c                     | }d}t        j                  t        d      5  |j                  t	        |      ddgd        d d d        y # 1 sw Y   y xY w)Nx,1
y,2,5
z,3
z"Expected 2 fields in line 2, saw 3r   rq   rs   r   r   r    s      r   1test_header_none_and_implicit_index_in_second_rowr     sP     FD	{*N	O GsCjFG G Gr   c                     | }d}|j                  t        |      ddgd d      }t        ddgdd	gd
      }t        j                  ||       y )Nr   rq   rs   skip)r3   r   on_bad_linesr   r   r5   r6   r   rB   r   s        r   &test_header_none_and_on_bad_lines_skipr     sY    FD__sCjF  F SzA78H&(+rH   c                     | }d}d}t        j                  t        |      5  |j                  t	        |      g d       d d d        y # 1 sw Y   y xY w)Nza,b
1,2
z;Passed header=\[0,1,2\], len of 3, but only 2 lines in filer   )r   r5   r$   r   r   )r   r   r!   r   s       r   test_header_missing_rowsr     sO     FD IC	z	- :y9: : :s   A

Ac                     | }d}|j                  t        |      d      }t        dgddd      }t        j                  ||       y )Nz1aa    bb(1,1)   cc(1,1)
                0  2  3.5z\s+)sepr   r$   g      @)aazbb(1,1)zcc(1,1)rB   r   s        r    test_header_multiple_whitespacesr     sK     FD __Xd^_8FcBCH&(+rH   c                     | }d}d}t        j                  t        |d      5  |j                  t	        |      d      }d d d        t        ddd	gi      }t        j                  |       y # 1 sw Y   /xY w)
Nza,b
1,2
3,4
    z;The 'delim_whitespace' keyword in pd.read_csv is deprecatedF)r   check_stacklevelT)delim_whitespaceza,bz1,2z3,4)rC   assert_produces_warningFutureWarningr   r   r   rD   )r   r   r!   depr_msgrE   rF   s         r   test_header_delim_whitespacer    s     FD
 MH		#	#X
 H $$GH %%01H&(+H Hs   A..A7c                     | }d}|j                  t        |      d ddgddd      }t        ddgd	d
ggd      }t        j                  ||       y )Nz
a,i,x
b,j,y
r   r5   zstring[pyarrow]r   )r   ri   r   dtype_backendr   rq   rg   rs   jr   rB   )pyarrow_parser_onlyr   r!   rE   rF   s        r   test_usecols_no_header_pyarrowr    si     FD __A  F 3*sCj19JKH&(+rH   ):__doc__collectionsr   ior   numpyr   r   pandas.errorsr   pandasr   r   r   pandas._testing_testingrC   r   filterwarnings
pytestmarkusefixturesxfail_pyarrowskip_pyarrowr   r"   parametrizer(   r+   rG   rN   rh   rl   rm   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  rT   rH   r   <module>r     sc  
 #    % 
 [[''C
 ''8{{&&~6 ( (3 ZB7#<=
7 >
7 D%=17 27 , ,",( ", ",J  5%.)'	
 a&E5>:H	

 a&eU^<J	
*G+*G( w&9:
 	Aq6
	
 3$3$3$3$3$3$
	
8,9 :,& 	Aq6
	
 3$3$3$3$3$3$
	
8,9 :,$ 	Aq6
	
 3$3$3$3$3$3$
	
8,9 :,& , ,( , ,* , ,( , , *,LM,, Be(<#=>, ? , 	_<=2	
	
,	
, S#J#@A7 B7 , ,  4!".
..P	
 6!".
..R	
 @&'.
..	
'"%L,M% N, taSk2I9b/i=VX, 3 
,D 	, 	, ) )> 7 7 , , G G, : : , , , ,",rH   