
    Owg,                        d dl mZ d dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlZd dlmZ d dlmZmZ d dlmZ erd dlmZ  G d de      Zy)    )annotations)TYPE_CHECKINGN)using_pyarrow_string_dtype)lib)import_optional_dependency)ParserErrorParserWarning)find_stack_level)pandas_dtype)
is_integer)	DataFrame)_arrow_dtype_mappingarrow_string_types_mapper)
ParserBase)
ReadBufferc                  L     e Zd ZdZd fdZd	dZd	dZd
dZd	dZddZ	 xZ
S )ArrowParserWrapperz7
    Wrapper for the pyarrow engine for read_csv()
    c                `    t         |   |       || _        || _        | j	                          y N)super__init__kwdssrc_parse_kwds)selfr   r   	__class__s      ]/var/www/horilla/myenv/lib/python3.12/site-packages/pandas/io/parsers/arrow_parser_wrapper.pyr   zArrowParserWrapper.__init__%   s*    	    c                    | j                   j                  d      }|dn|| _        | j                   d   }t        |t              rt        d      t        | j                   d         | _        y)z?
        Validates keywords before passing to pyarrow.
        encodingNzutf-8	na_valuesz?The pyarrow engine doesn't support passing a dict for na_values)r   getr    
isinstancedict
ValueErrorlistr!   )r   r    r!   s      r   r   zArrowParserWrapper._parse_kwds,   sc      $yy}}Z8#+#3IIk*	i&Q  dii45r   c                0   ddddddd}|j                         D ]X  \  }}|| j                  v s| j                  j                  |      1| j                  j                  |      | j                  |<   Z | j                  }t        |t              r|g}nd}|| j                  d	<   | j                  j                         D ci c]  \  }}||d
v r|| c}}| _        | j                  j                  d      }|t        |      r|| j                  d<   n|t        j                  j                  k(  rd| j                  d<   n^|t        j                  j                  k(  rdd}|| j                  d<   n-|t        j                  j                  k(  rd | j                  d<   | j                  j                         D ci c]  \  }}||dv r|| c}}| _        d| j                  d   v | j                  d<   | j                  ;d| j                  v r-| j                  d   D 	cg c]  }	d|	 	 c}	| j                  d<   | j                  du | j                  | j                  n| j                  d   | j                   d| _        yc c}}w c c}}w c c}	w )z:
        Rename some arguments to pass to pyarrow
        include_columnsnull_valuesescape_charignore_empty_linesdecimal_point
quote_char)usecolsr!   
escapecharskip_blank_linesdecimal	quotecharNtimestamp_parsers)	delimiterr-   r*   r+   on_bad_linesinvalid_row_handlerc                    t        j                  d| j                   d| j                   d| j                   t
        t                      y)Nz	Expected z columns, but found z: )
stacklevelskip)warningswarnexpected_columnsactual_columnstextr	   r
   )invalid_rows    r   handle_warningz?ArrowParserWrapper._get_pyarrow_options.<locals>.handle_warningk   sL    MM#K$@$@#AAU&556b9I9I8JL%#3#5	 "r   c                     y)Nr9    )_s    r   <lambda>z9ArrowParserWrapper._get_pyarrow_options.<locals>.<lambda>v   s    r   )r(   r)   true_valuesfalse_valuesr,   r3    strings_can_be_nullfskiprows)autogenerate_column_names	skip_rowsr    )returnstr)itemsr   r"   popdate_formatr#   rN   parse_optionscallabler   BadLineHandleMethodERRORWARNSKIPconvert_optionsheaderr    read_options)
r   mappingpandas_namepyarrow_namerQ   option_nameoption_valuer5   r@   ns
             r   _get_pyarrow_optionsz'ArrowParserWrapper._get_pyarrow_options:   s   
 )&' 4&%
 *1 	E%Kdii'DIIMM+,F,R*.))--*D		,'	E &&k3'&-K K)4		%& .2YY__->
)\'OP %
 yy}}^4#%<H""#89!?!?!E!EE  "") !?!?!D!DD" =K""#89!?!?!D!DD<L""#89 .2YY__-> 
)\' % 
 79DIIm<T6T23;;#48L8L#L!%!5!56G!H7!A37D  !23
 *.)<{{& :&
i
> 
"7s   J?J$Jc                0   t        |j                        }d}| j                  | j                  | j                  t	        |      | _        t        | j                        |k7  r>t        t	        |t        | j                        z
              | j                  z   | _        d}| j                  |_        | j                  |j                  |      \  }}| j                  l| j                  j                         }t        | j                        D ]  \  }}t        |      r|j                  |   ||<   n||j                  vrt        d| d      | j                  N| j                  j                  |      || j                  j                  |      fn6|j                  |   | j                  j                  |j                  |         f\  }}	|	||   j                  |	      ||<   | j                  |=  |j                  |dd       | j                  4|s2dgt        |j                   j                        z  |j                   _        | j                  t#        | j                  t$              rK| j                  j'                         D 
ci c]  \  }
}|
|j                  v r|
t)        |      ! c}}
| _        nt)        | j                        | _        	 |j                  | j                        }|S |S c c}}
w # t*        $ r}t        |      d}~ww xY w)z
        Processes data read in based on kwargs.

        Parameters
        ----------
        frame: DataFrame
            The DataFrame to process.

        Returns
        -------
        DataFrame
            The processed DataFrame.
        TNFzIndex z invalid)dropinplace)lencolumnsrY   namesranger&   _do_date_conversions	index_colcopy	enumerater   r%   dtyper"   astype	set_indexindexr#   r$   rO   r   	TypeError)r   framenum_colsmulti_index_namedrC   index_to_setiitemkey	new_dtypekves                r   _finalize_pandas_outputz*ArrowParserWrapper._finalize_pandas_output   s    u}}% ;;zz!;;&!&xDJ4::(*
 "%3tzz?(B"CDtzzQ
$)! JJEM,,U]]EB5>>%>>..0L$T^^4 ,4d#&+mmD&9LO.$vdV8%<== ::)  ::>>$/; tzz~~d34#mmD14::>>%--PTBU3VW #C
 !,%*3Z%6%6y%Ac
 JJsO!,$ OOLtTOB{{"+<%)FS1B1B-C$C!::! $**d+ !%

 0 0 21EMM) |A&
 *$**5
$TZZ0 u  $ m#$s   $K6K< <	LLLc                    t        j                  |      rt        d |D              st        d      t	        |      rt        d      y )Nc              3  <   K   | ]  }t        |t                y wr   r#   rN   .0xs     r   	<genexpr>z7ArrowParserWrapper._validate_usecols.<locals>.<genexpr>   s     0UAs1C0U   zwThe pyarrow engine does not allow 'usecols' to be integer column positions. Pass a list of string column names instead.z=The pyarrow engine does not allow 'usecols' to be a callable.)r   is_list_likeallr%   rS   )r   r.   s     r   _validate_usecolsz$ArrowParserWrapper._validate_usecols   sO    G$S0UW0U-UP  gO  r   c           	        t        d      }t        d      }| j                          	  |j                  di | j                  }	 |j                  | j                   |j                  di | j                   |j                  di | j                   |      }| j&                  d	   }|t        j(                  u r|j*                  }	|j-                         }
t/        |j*                  j0                        D ]Q  \  }}|j0                  j3                  |      s"|	j                  ||	j5                  |      j7                  |
            }	S |j9                  |	      }|dk(  r!|j;                  t<        j>                  
      }n|dk(  rLtA               }t=        jB                         ||jE                         <   |j;                  |j
                  
      }n5tG               r|j;                  tI               
      }n|j;                         }| jK                  |      S # t        $ r | j                  j                  dd      }|| j                  |       | j                  j                  dt                     }t        j                  |      rt        d |D              st	        d       w xY w# |j"                  $ r}t%        |      |d}~ww xY w)z
        Reads the contents of a CSV file into a DataFrame and
        processes it according to the kwargs passed in the
        constructor.

        Returns
        -------
        DataFrame
            The DataFrame created from the CSV file.
        pyarrowzpyarrow.csvr(   Nr)   c              3  <   K   | ]  }t        |t                y wr   r   r   s     r   r   z*ArrowParserWrapper.read.<locals>.<genexpr>   s      6'(
1c"6r   z9The 'pyarrow' engine requires all na_values to be strings)rZ   rR   rX   dtype_backend)types_mappernumpy_nullablerB   )&r   ra   ConvertOptionsrX   rq   r"   r   setr   r   r   read_csvr   ReadOptionsrZ   ParseOptionsrR   ArrowInvalidr   r   
no_defaultschemafloat64rl   typesis_nullfield	with_typecast	to_pandaspd
ArrowDtyper   
Int64Dtypenullr   r   r}   )r   papyarrow_csvrX   includenullstabler|   r   
new_schemanew_typerv   
arrow_typerr   dtype_mappings                  r   readzArrowParserWrapper.read   s    (	20?!!#	8k88P4;O;OPO 	(((4[44It7H7HI6k66L9K9KL /	 ) E 		/2 CNN*Jzz|H!*5<<+=+=!> :88##J/!+:++A.88B"J JJz*EI%OOO?E.. 12M')}}M"'')$OO1B1BOCE')OO1J1LOME OO%E++E22k  	**../@$GG"&&w/((,,]CEBE##E*# 6,16 3  O  	,  	(a.a'	(s%   H AJ) BJ&)K	8KK	)r   zReadBuffer[bytes]rM   None)rM   r   )rr   r   rM   r   )rM   r   )__name__
__module____qualname____doc__r   r   ra   r}   r   r   __classcell__)r   s   @r   r   r       s+    6Y
vGR	F3r   r   )
__future__r   typingr   r:   pandas._configr   pandas._libsr   pandas.compat._optionalr   pandas.errorsr   r	   pandas.util._exceptionsr
   pandas.core.dtypes.commonr   pandas.core.dtypes.inferencer   pandasr   r   pandas.io._utilr   r   pandas.io.parsers.base_parserr   pandas._typingr   r   rB   r   r   <module>r      sK    "    5  > 5 2 3   5)O3 O3r   