
    OwgD                    *   d dl mZ d dlmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlZd d	lmZmZ d d
lmZ d dlmZmZ d dlmZmZmZmZ d dlm Z m!Z!m"Z" erd dlm#Z# ejH                  ejJ                  ejL                  ejN                  ejP                  ejR                  ejR                  dZ*ejL                  ejV                  dfejR                  ejX                  e
fejH                  ejZ                  dfejJ                  ejZ                  dfejN                  ejZ                  dfej\                  ejX                  dfejP                  ej^                  d fiZ0ejZ                  dejV                  dejX                  diZ1 G d de      Z2y)    )annotations)TYPE_CHECKINGAnyN)infer_dtype)iNaT)NoBufferPresent)cache_readonly)BaseMaskedDtype)
ArrowDtypeDatetimeTZDtype)is_string_dtype)PandasBufferPandasBufferPyarrow)ColumnColumnBuffersColumnNullType	DtypeKind)ArrowCTypes
Endiannessdtype_to_arrow_c_fmt)Buffer)iufbUMmzThis column is non-nullablezThis column uses NaN as nullz!This column uses a sentinel valuec                      e Zd ZdZdddZddZedd       Zedd       Z	ddZ
ed        Zed        Zedd	       Zedd
       ZddZdddZddZ	 	 ddZddZddZy)PandasColumna  
    A column object, with only the methods and properties required by the
    interchange protocol defined.
    A column can contain one or more chunks. Each chunk can contain up to three
    buffers - a data buffer, a mask buffer (depending on null representation),
    and an offsets buffer (if variable-size binary; e.g., variable-length
    strings).
    Note: this Column object can only be produced by ``__dataframe__``, so
          doesn't need its own version or ``__column__`` protocol.
    c                    t        |t        j                        rt        d|j                   d      t        |t        j
                        st        dt        |       d      || _        || _	        y)zu
        Note: doesn't deal with extension arrays yet, just assume a regular
        Series/ndarray for now.
        zExpected a Series, got a DataFrame. This likely happened because you called __dataframe__ on a DataFrame which, after converting column names to string, resulted in duplicated names: zD. Please rename these columns before using the interchange protocol.zColumns of type  not handled yetN)

isinstancepd	DataFrame	TypeErrorcolumnsSeriesNotImplementedErrortype_col_allow_copy)selfcolumn
allow_copys      U/var/www/horilla/myenv/lib/python3.12/site-packages/pandas/core/interchange/column.py__init__zPandasColumn.__init__T   su    
 fbll+ !..) *22  &")),%(8fFV&WXX 	%    c                .    | j                   j                  S )z2
        Size of the column, in elements.
        )r,   sizer.   s    r1   r5   zPandasColumn.sizeh   s     yy~~r3   c                     y)z7
        Offset of first element. Always zero.
        r    r6   s    r1   offsetzPandasColumn.offsetn   s     r3   c                   | j                   j                  }t        |t        j                        rb| j                   j
                  j                  }| j                  |j                        \  }}}}t        j                  ||t        j                  fS t        |      rMt        | j                         dv r+t        j                  dt        |      t        j                  fS t!        d      | j                  |      S )N)stringempty   z.Non-string object dtypes are not supported yet)r,   dtyper$   r%   CategoricalDtypevaluescodes_dtype_from_pandasdtyper   CATEGORICALr   NATIVEr   r   STRINGr   r*   )r.   r>   rA   _bitwidthc_arrow_dtype_f_strs         r1   r>   zPandasColumn.dtypev   s    		eR001II$$**E ,,U[[9# %%#!!	  U#499%)<<$$(/%%	  &&VWW//66r3   c                   t         j                  |j                  d      }|t        d| d      t	        |t
              r|j                  j                  }nZt	        |t              r|j                  j                  }n3t	        |t              r|j                  j                  }n|j                  }|dk(  r||j                  t        j                  |fS ||j                  dz  t        |      |fS )z/
        See `self.dtype` for details.
        N
Data type z& not supported by interchange protocolzbool[pyarrow]r=   )	_NP_KINDSgetkind
ValueErrorr$   r   numpy_dtype	byteorderr   baser
   itemsizer   BOOLr   )r.   r>   rM   rP   s       r1   rB   z$PandasColumn._dtype_from_pandasdtype   s     }}UZZ.<z%0VWXXeZ())33I/

,,I/))33IIO#   	  U^^a')=e)DiOOr3   c                   | j                   d   t        j                  k(  st        d      | j                  j
                  j                  dt        t        j                  | j                  j
                  j                              dS )a:  
        If the dtype is categorical, there are two options:
        - There are only values in the data buffer.
        - There is a separate non-categorical Column encoding for categorical values.

        Raises TypeError if the dtype is not categorical

        Content of returned dict:
            - "is_ordered" : bool, whether the ordering of dictionary indices is
                             semantically meaningful.
            - "is_dictionary" : bool, whether a dictionary-style mapping of
                                categorical values to other objects exists
            - "categories" : Column representing the (implicit) mapping of indices to
                             category values (e.g. an array of cat1, cat2, ...).
                             None if not a dictionary-style categorical.
        r   zCdescribe_categorical only works on a column with categorical dtype!T)
is_orderedis_dictionary
categories)r>   r   rC   r'   r,   catorderedr!   r%   r)   rW   r6   s    r1   describe_categoricalz!PandasColumn.describe_categorical   si    $ zz!}	 5 55U 
 ))--//!&ryy1I1I'JK
 	
r3   c                   t        | j                  j                  t              rt        j
                  }d}||fS t        | j                  j                  t              rb| j                  j                  j                  j                  d   j                         d   t        j                  d fS t        j                  dfS | j                  d   }	 t        |   \  }}||fS # t        $ r t        d| d      w xY w)N   r   rJ   z not yet supported)r$   r,   r>   r
   r   USE_BYTEMASKr   array	_pa_arraychunksbuffersNON_NULLABLEUSE_BITMASK_NULL_DESCRIPTIONKeyErrorr*   )r.   column_null_dtype
null_valuerM   nullvalues         r1   describe_nullzPandasColumn.describe_null   s    diioo7 . ; ;J$j00diiooz2 yy((//2::<Q?G%22D88!--q00zz!}	M+D1KD% U{  	M%
4&8J&KLL	Ms   C! !C:c                n    | j                   j                         j                         j                         S )zB
        Number of null elements. Should always be known.
        )r,   isnasumitemr6   s    r1   
null_countzPandasColumn.null_count   s'    
 yy~~##%**,,r3   c                2    d| j                   j                  iS )z8
        Store specific metadata of the column.
        zpandas.index)r,   indexr6   s    r1   metadatazPandasColumn.metadata   s    
 		00r3   c                     y)zE
        Return the number of chunks the column consists of.
        r\   r8   r6   s    r1   
num_chunkszPandasColumn.num_chunks   s     r3   Nc              #     K   |rt|dkD  rot        | j                        }||z  }||z  dk7  r|dz  }t        d||z  |      D ]4  }t        | j                  j                  |||z    | j
                         6 y|  yw)zy
        Return an iterator yielding the chunks.
        See `DataFrame.get_chunks` for details on ``n_chunks``.
        r\   r   N)lenr,   ranger!   ilocr-   )r.   n_chunksr5   stepstarts        r1   
get_chunkszPandasColumn.get_chunks   s     
 1tyy>D8#Dh!#	q$/48 "IINN554<8$:J:J 
 Js   A<A>c                    | j                         ddd}	 | j                         |d<   	 | j                         |d<   |S # t        $ r Y !w xY w# t        $ r Y |S w xY w)a`  
        Return a dictionary containing the underlying buffers.
        The returned dictionary has the following contents:
            - "data": a two-element tuple whose first element is a buffer
                      containing the data and whose second element is the data
                      buffer's associated dtype.
            - "validity": a two-element tuple whose first element is a buffer
                          containing mask values indicating missing data and
                          whose second element is the mask value buffer's
                          associated dtype. None if the null representation is
                          not a bit or byte mask.
            - "offsets": a two-element tuple whose first element is a buffer
                         containing the offset values for variable-size binary
                         data (e.g., variable-length strings) and whose second
                         element is the offsets buffer's associated dtype. None
                         if the data buffer does not have an associated offsets
                         buffer.
        N)datavalidityoffsetsr   r   )_get_data_buffer_get_validity_bufferr   _get_offsets_buffer)r.   ra   s     r1   get_bufferszPandasColumn.get_buffers  s    ( ))+"
	"&";";"=GJ	!%!9!9!;GI   		
  		s    ? A 	A
A	AAc                   | j                   d   t        j                  t        j                  t        j                  t        j
                  t        j                  fv rQ| j                   }| j                   d   t        j                  k(  rOt        | j                   d         dkD  r4| j                  j                  j                  d      j                         }n| j                  j                  }t        | j                  j                   t              r|j                  }ntt        | j                  j                   t               rD|j"                  j$                  d   }t'        |j)                         d   t        |            }||fS |j*                  }t-        || j.                        }||fS | j                   d   t        j0                  k(  rV| j                  j2                  j4                  }t-        || j.                        }| j7                  |j                         }||fS | j                   d   t        j8                  k(  r| j                  j                         }t;               }|D ]4  }t        |t<              s|j?                  |jA                  d	             6 t-        tC        jD                  |d
            }| j                   }||fS tG        d| j                  j                    d      )zZ
        Return the buffer containing the data and the buffer's associated dtype.
        r         Nr\   length)r0   utf-8encodinguint8)r>   rJ   r#   )$r>   r   INTUINTFLOATrS   DATETIMErv   r,   dt
tz_convertto_numpyr^   r$   r
   _datar   r_   r`   r   ra   _ndarrayr   r-   rC   r@   _codesrB   rE   	bytearraystrextendencodenp
frombufferr*   )	r.   r>   np_arrarrbufferrA   bufr   objs	            r1   r   zPandasColumn._get_data_buffer0  sF    ::a=MMNNOONN
 
 JJEzz!}	 2 22s4::a=7IA7M006??Aiioodiioo? YYF		< --..q1C0a("3xF "5=( \\F!&T5E5EFF4 u}3 ZZ]i333II$$++E!%D4D4DEF00=E, u}+ ZZ]i...))$$&CA  ;c3'HHSZZZ9:; ""--"ABF
 JJE u} &
499??2CCS&TUUr3   c                   | j                   \  }}t        | j                  j                  t              r| j                  j
                  j                  j                  d   }t        j                  dt        j                  t        j                  f}|j                         d   yt        |j                         d   t        |            }||fS t        | j                  j                  t               r_| j                  j
                  j"                  }t%        |      }t        j                  dt        j                  t        j                  f}||fS | j                  d   t        j&                  k(  r| j                  j)                         }|dk(  }| }t+        j,                  t        |      ft*        j.                        }t1        |      D ]  \  }	}
t        |
t2              r|n|||	<    t%        |      }t        j                  dt        j                  t        j                  f}||fS 	 t4        |    d}t;        |      # t6        $ r t9        d      w xY w)	z
        Return the buffer containing the mask values indicating missing data and
        the buffer's associated dtype.
        Raises NoBufferPresent if null representation is not a bit or byte mask.
        r   r\   Nr   r=   shaper>   z! so does not have a separate maskzSee self.describe_null)rj   r$   r,   r>   r   r^   r_   r`   r   rS   r   r   rD   ra   r   rv   r
   _maskr   rE   r   r   zerosbool_	enumerater   _NO_VALIDITY_BUFFERre   r*   r   )r.   rh   invalidr   r>   r   maskr   validr   r   msgs               r1   r   z!PandasColumn._get_validity_buffern  s    **gdiiooz2 ))//++2215C^^Q(8(8*:K:KLE{{}Q'(a 3xF 5= diioo799??((D!$'F^^Q(8(8*:K:KLE5= ::a=I,,, ))$$&C qLEiG883s8+RXX>D#C. E3#-c3#7%WQE
 "$'F ^^Q(8(8*:K:KLE5= 	@(.//PQC
 c""	  	@%&>??	@s   .I Ic                   | j                   d   t        j                  k(  r| j                  j	                         }d}t        j                  t        |      dz   ft
        j                        }t        |      D ]=  \  }}t        |t              r |j                  d      }|t        |      z  }|||dz   <   ? t        |      }t        j                  dt        j                   t"        j$                  f}||fS t'        d      )a  
        Return the buffer containing the offset values for variable-size binary
        data (e.g., variable-length strings) and the buffer's associated dtype.
        Raises NoBufferPresent if the data buffer does not have an associated
        offsets buffer.
        r   r\   r   r   r   @   zJThis column has a fixed-length dtype so it does not have an offsets buffer)r>   r   rE   r,   r   r   r   rv   int64r   r$   r   r   r   r   r   INT64r   rD   r   )	r.   r@   ptrr   r   vr   r   r>   s	            r1   r   z PandasColumn._get_offsets_buffer  s     ::a=I,,,YY'')FChhc&kAo%7rxxHG!&) %1 a%'2A3q6MC!$A% "'*F !!!!	E u} "5 r3   )T)r/   z	pd.Seriesr0   boolreturnNone)r   int)r   ztuple[DtypeKind, int, str, str])r   zdict[str, pd.Index])N)ry   z
int | None)r   r   )r   z.tuple[Buffer, tuple[DtypeKind, int, str, str]])r   ztuple[Buffer, Any] | None)r   ztuple[PandasBuffer, Any])__name__
__module____qualname____doc__r2   r5   propertyr9   r	   r>   rB   rZ   rj   ro   rr   rt   r|   r   r   r   r   r8   r3   r1   r!   r!   H   s    	&(   7 7:PB 
 
8  & - - 1 1"#J<	7<|7#r&r3   r!   )3
__future__r   typingr   r   numpyr   pandas._libs.libr   pandas._libs.tslibsr   pandas.errorsr   pandas.util._decoratorsr	   pandas.core.dtypes.dtypesr
   pandasr%   r   r   pandas.api.typesr   pandas.core.interchange.bufferr   r   *pandas.core.interchange.dataframe_protocolr   r   r   r   pandas.core.interchange.utilsr   r   r   r   r   r   r   rS   rE   r   rK   USE_NANUSE_SENTINELrb   rC   r]   rd   r   r!   r8   r3   r1   <module>r      si   "
  ( $ ) 2 5  -   A 
													 OOn,,d344d;MMN//6NN^00$7NN^00$7 N77<~22A6  !>:!D E6 Er3   