
    Owg*                     2   d Z ddlmZmZ ddlZddlZddlZddlZddl	Z	ddl
mZmZ ddlmZ e	j                   j#                  d      Ze	j                   j'                  d      Zd Zed        Zee	j                   j/                  d	d
dg      e	j                   j/                  dg d      d                      Zd Zd Ze	j                   j/                  ddi  eddgi      fdddi eddgi      fdddgi edddgi      fddgdd eddgi      fddgdd edej6                  dgi      fg      d         Zd! Ze	j                   j/                  d"g d#      d$        Ze	j                   j/                  d%ddg      d&        Zd' Z e	j                   j/                  dg d(      d)        Z!e	j                   j/                  dg d*      d+        Z"d, Z#d- Z$e	j                   j'                  d.      e	j                   j/                  d/d0d1g      d2               Z%y)3zZ
Tests encoding functionality during parsing
for all of the parsers defined in parsers.py
    )BytesIOTextIOWrapperN)	DataFrameread_csvz=ignore:Passing a BlockManager to DataFrame:DeprecationWarningpyarrow_skipc                     d}| }t        dj                  |            }|j                  |d|      }t        ddggddg	      }t	        j
                  ||       y )
Ncp1255u   שלום:1234
562:123:)sepencodingi2  {   u   שלום1234columnsr   encoder   r   tmassert_frame_equal)all_parsersr   parserdataresultexpecteds         [/var/www/horilla/myenv/lib/python3.12/site-packages/pandas/tests/io/parser/test_encoding.pytest_bytes_io_inputr      s]    HF+228<=D__TsX_>F3*
F/CDH&(+    c                     | }t        dj                               }|j                  |ddd       }t        ddgg      }t	        j
                  ||       y )Nu   Łaski, Jan;1;utf-8)r   r   headeru   Łaski, Jan   r   )r   r   r   r   r   s        r   test_read_csv_unicoder"   (   sT    F&--/0D__TsWT_JF,a012H&(+r   r   ,	r   )utf-16zutf-16lezutf-16bec                 8   | }dj                  d|      }dt        j                          d}|dd}d}t        j                  |      5 }|j                  |      }t        |d      5 }	|	j                  |       d d d        t        t        |j                  |            |	      5 }
 |j                  |fd
|i|} |j                  |
fd
|i|}d d d        t        j                         d d d        y # 1 sw Y   xY w# 1 sw Y   4xY w# 1 sw Y   y xY w)Nz)skip this
skip this too
A,B,C
1,2,3
4,5,6r#   __z__.csv   )r   skiprowsr   wbr   r   )replaceuuiduuid4r   ensure_cleanr   openwriter   r   r   r   )r   r   r   r   r   pathkwargsutf8
bytes_datafbytes_bufferr   r   s                r   test_utf16_bom_skiprowsr8   2   s'   
 F	 
S
	 	 

~V$Da(FD		 	0$[[*
$ 	 GGJ	  74;;t#45E 	N$V__TGHGGF&v|MdMfMH	N 	fh/	0 	0	  	 	N 	N	0 	0s<   	D'C89-D&+DD8D	=DD		DDc                     t         j                  j                  |d      }| }|j                  |dd      }t	        |      dk(  sJ y )Nzutf16_ex.txtr%   r$   )r   r   2   )osr2   joinr   len)r   csv_dir_pathr2   r   r   s        r   test_utf16_exampler?   O   sA    77<<n5DF__TH$_?Fv;"r   c                     t         j                  j                  |d      }| }|j                  |d d      }|j	                  d      }|d   d   }d}||k(  sJ y )Nunicode_series.csvlatin-1)r    r   r   r!   i`  u$   Á köldum klaka (Cold Fever) (1994))r;   r2   r<   r   	set_index)r   r>   r2   r   r   gotr   s          r   test_unicode_encodingrE   V   s^    77<<&:;DF__T$_CFa F
)D/C9H(??r   zdata,kwargs,expectedza
1ar!   z"a"
1	quotechar"zb
1namesb1
1T)rI   skip_blank_linesFc                    	 | }dd		fd}|j                   dk(  r-|dk(  r(|j                  dd      rt        j                  d	        |j                   ||      fd
	i|}t        j                  ||       y )Nu   ﻿r   c                 B    | z   j                        }t        |      S )N)r   r   )_databom_databomr4   s     r   _encode_data_with_bomz,test_utf8_bom.<locals>._encode_data_with_bom{   s"    %K''-x  r   pyarrowrL   rM   Tz,https://github.com/apache/arrow/issues/38676)reasonr   )enginegetpytestskipr   r   r   )
r   r   r3   r   requestr   rS   r   rR   r4   s
           @@r   test_utf8_bomr[   b   s|    * F
CD!
 	"EMJJ)40 	IJV__248R4R6RF&(+r   c                     t        dgdgd      }| }|j                  |      }dj                  |      }|j                  t	        |      |      }t        j                  ||       y )Ng333333@test)mb_num	multibytezmb_num,multibyte
4.8,testr+   )r   formatr   r   r   r   r   )r   	utf_valueencoding_fmtr   r   r   r   r   s           r   test_read_csv_utf_aliasesrc      s`    SEABHF""9-H'..x8D__WT]X_>F&(+r   zfile_path,encoding)))ior   csvz	test1.csvr   ))rd   r   r   rA   rB   ))rd   r   r   zsauron.SHIFT_JIS.csvshiftjisc                 R   | } || }|j                  ||      }t        ||      5 }|j                  |      }|j                  rJ 	 d d d        t        j                  |       t        |d      5 }	|j                  |	|      }|	j                  rJ 	 d d d        t        j                  ||       t        |dd      5 }	|j                  |	|      }|	j                  rJ 	 d d d        t        j                  ||       y # 1 sw Y   xY w# 1 sw Y   {xY w# 1 sw Y   8xY w)Nr+   rb)moder   )ri   	buffering)r   r0   closedr   r   )
r   	file_pathr   datapathr   fpathr   far   fbs
             r   test_binary_mode_file_buffersrq      s    Fi Eux8H	eh	' 2$99}9 (F+	e$	 2h799}9 (F+	e$!	, h799}9 (F+ 
 
 s#    D4"D"DDDD&pass_encodingc                    | }|j                  |      }|j                  dk(  r|du r|dv rt        j                  d       t	        ddgi      }t        j                  d|d      5 }|j                  d	       |j                  d
       |j                  ||r|nd       }t        j                  ||       d d d        y # 1 sw Y   y xY w)NrT   T)       zThese cases freezefoobarzw+)ri   r   return_filelikezfoo
barr   r+   )r`   rV   rX   rY   r   r   r/   r1   seekr   r   )	r   ra   rb   rr   r   r   r   r6   r   s	            r   test_encoding_temp_filerz      s     F""9-H}}	!mt&;	X@U()%%)*H	dXt	L 0PQ	
	q	XDQ
fh/0 0 0s   &AB??Cc                 ^   | }d}d}d}t        ||gi      }t        j                         5 }|j                  | d| j	                  |             |j                  d       |j                  ||      }t        j                  ||       |j                  rJ 	 d d d        y # 1 sw Y   y xY w)Nz	shift-jisu	   てすとu   こむ
r   r+   )
r   tempfileNamedTemporaryFiler1   r   ry   r   r   r   rk   )r   r   r   titler   r   r6   r   s           r   test_encoding_named_temp_filer      s    FHED%$)H		$	$	& !	5'D6"))(34	q	X6
fh/88|8  s   A.B##B,)r   r%   z	utf-16-bez	utf-16-lezutf-32c                     d}t        |j                  |             }t        |d|       }t        ddgddgdd	ggd
dg      }t	        j
                  ||       y )Nu   a	b
：foo	0
bar	1
baz	2r$   )	delimiterr   u   ：foor   rw   r!   bazr(   rF   rJ   )r   r   r   )r   r   encoded_datar   r   s        r   %test_parse_encoded_special_charactersr      sc     -D4;;x01LldXFFmeQZ%4c
H &(+r   )r   Nr%   r	   rB   c                    | }t        g dg dg dd      }t        j                         5 }|j                  |d|       |j                  dk(  rDd}t        j                  t        |	      5  |j                  ||d
       d d d        	 d d d        y |j                  ||d
      }d d d        t        j                  |       y # 1 sw Y   FxY w# 1 sw Y   ,xY w)N)Raphael	DonatellozMiguel AngelLeonardo)redpurpleorangeblue)saizbo staffnunchunkkatana)namemaskweaponF)indexr   rT   BThe 'memory_map' option is not supported with the 'pyarrow' enginematchT)r   
memory_map)
r   r   r/   to_csvrV   rX   raises
ValueErrorr   r   )r   r   r   r   filemsgdfs          r   test_encoding_memory_mapr      s     FH7?	
H 
	 	GdEH===I%VCz5 JxDIJ	G 	G __TH_F	G "h'J J	G 	Gs*   AC-C	CCC	CCc                    | }t        dgdz        }d|j                  d<   t        j                  d      5 }|j	                  |ddd	       |j
                  d
k(  rDd}t        j                  t        |      5  |j                  |dd       ddd       	 ddd       y|j                  |dd      }ddd       t        j                  |       y# 1 sw Y   FxY w# 1 sw Y   ,xY w)zO
    Chunk splits a multibyte character with memory_map=True

    GH 43540
    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaai   )r   u   aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaąi  zbug-gh43540.csvFr   r   r    r   rT   r   r   NT)r    r   )r   ilocr   r/   r   rV   rX   r   r   r   r   )r   r   r   fnamer   dfrs         r    test_chunk_splits_multibyte_charr     s     F	d*	+B %BGGDM	*	+ 	Cu
		%uUW	E==I%VCz5 EdtDE	C 	C ooeDToB	C #r"E E	C 	Cs*   AC9C	C CC	CC(c           	         g }d}d}d}t        t        |      t        |      |      D ]]  }dj                  t        ||dz         D cg c]  }t        |       c}      dz   }	 |j	                  d       |j                  |       _ | }t        |      }	t        j                  d      5 }
|	j                  |
ddd	       |j                  d
k(  rEd}t        j                  t        |      5  |j                  |
ddd       ddd       	 ddd       y|j                  |
ddd      }ddd       t        j                   |	       yc c}w # t
        $ r Y 1w xY w# 1 sw Y   \xY w# 1 sw Y   AxY w)zg
    GH 43787

    Test correct handling of UTF-8 chars when memory_map=True and encoding is UTF-8
        u   𐂀 r|   r   zutf8test.csvFr   rT   r   r   NT)r    r   r   )rangeordr<   chrr   UnicodeEncodeErrorappendr   r   r/   r   rV   rX   r   r   r   r   )r   linesline_length
start_charend_charlnumcliner   r   r   r   r   s                r   test_readcsv_memmap_utf8r   "  sq    EKJH c*os8}kB wwdD4K(@A1AABTI	KK  	T F	5	B		( 	UE
		%uUW	E==I%VCz5 WdtgVW	U 	U ooeDTGoT	U "c"% B " 		W W	U 	UsC   E
$E*AE),E	E)E)	EEE&	"E))E2pyarrow_xfailri   zw+bzw+tc                    | }d}d|v rd}t        j                  |d      5 }|j                  |       |j                  d       |j	                  |      }d d d        t        g dg      }t        j                  |       y # 1 sw Y   .xY w)Ns   abcdtabcdr   )ri   r   r   r   )r}   SpooledTemporaryFiler1   ry   r   r   r   r   )r   ri   r   contenthandler   r   s          r   test_not_readabler   D  s     FG
d{		&	&D7	C %vWA__V$% fX.H"h'% %s   4BB)&__doc__rd   r   r   r;   r}   r-   numpynprX   pandasr   r   pandas._testing_testingr   markfilterwarnings
pytestmarkusefixturesskip_pyarrowr   r"   parametrizer8   r?   rE   nanr[   rc   rq   rz   r   r   r   r   r   r    r   r   <module>r      s   
     [[''C
 {{&&~6, , , d,%GH0 I - 04	  
YaSz*+	K%y#s'<=	'C5!9cC:->#?@	3%T:IsQCj<QR e7sRVVQK()	
&,'&,,	, ,,0 4-80 90&( G,, %ST( U(.#4#D )%0( 1 *(r   