
    Owg;              	      H   d Z ddlmZ ddlmZ ddlmZ ddlZddlmZ ddl	Z	ddl
ZddlmZ ddlmZ ddlZdd	lmZ dd
lmZ erddlmZmZmZmZ dZdZdZdZg dZdZ dZ!dZ"dZ#de  de" de! de# d	Z$de  de! dZ%dZ&d!dZ'd"dZ(d Z)d Z* G d d eejV                        Z,y)#a-  
Read a SAS XPort format file into a Pandas DataFrame.

Based on code from Jack Cushman (github.com/jcushman/xport).

The file format is defined here:

https://support.sas.com/content/dam/SAS/support/en/technical-papers/record-layout-of-a-sas-version-5-or-6-data-set-in-sas-transport-xport-format.pdf
    )annotations)abc)datetimeN)TYPE_CHECKING)Appender)find_stack_level)
get_handle)
ReaderBase)CompressionOptionsDatetimeNaTTypeFilePath
ReadBufferzPHEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!000000000000000000000000000000  zKHEADER RECORD*******MEMBER  HEADER RECORD!!!!!!!000000000000000001600000000zPHEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000  zPHEADER RECORD*******OBS     HEADER RECORD!!!!!!!000000000000000000000000000000  )ntypenhfunfield_lengthnvar0namelabelnformnflnum_decimalsnfjnfillniformniflnifdnpos_zParameters
----------
filepath_or_buffer : str or file-like object
    Path to SAS file or object implementing binary read method.zindex : identifier of index column
    Identifier of column that should be used as index of the DataFrame.
encoding : str
    Encoding for text data.
chunksize : int
    Read file `chunksize` lines at a time, returns iterator.zBformat : str
    File format, only `xport` is currently supported.z\iterator : bool, default False
    Return XportReader object for reading file incrementally.z#Read a SAS file into a DataFrame.


a  

Returns
-------
DataFrame or XportReader

Examples
--------
Read a SAS Xport file:

>>> df = pd.read_sas('filename.XPT')

Read a Xport file in 10,000 line chunks:

>>> itr = pd.read_sas('filename.XPT', chunksize=10000)
>>> for chunk in itr:
>>>     do_something(chunk)

z$Class for reading SAS Xport files.

z

Attributes
----------
member_info : list
    Contains information about the file
fields : list
    Contains information about the variables in the file
zRead observations from SAS Xport file, returning as data frame.

Parameters
----------
nrows : int
    Number of rows to read from data file; if None, read whole
    file.

Returns
-------
A DataFrame.
c                n    	 t        j                  | d      S # t        $ r t        j                  cY S w xY w)z1Given a date in xport format, return Python date.z%d%b%y:%H:%M:%S)r   strptime
ValueErrorpdNaT)datestrs    N/var/www/horilla/myenv/lib/python3.12/site-packages/pandas/io/sas/sas_xport.py_parse_dater'      s3      *;<< vvs    44c                d    i }d}|D ]#  \  }}| |||z    j                         ||<   ||z  }% |d= |S )a  
    Parameters
    ----------
    s: str
        Fixed-length string to split
    parts: list of (name, length) pairs
        Used to break up string, name '_' will be filtered from output.

    Returns
    -------
    Dict of name:contents of string at given location.
    r   r   )strip)spartsoutstartr   lengths         r&   _split_liner/      sU     CE feefn-335D	 	CJ    c                    |dk7  rit        j                  t        |       t        j                  d            }t        j                  d| dd|z
         }|j	                  |      }| |d<   |S | S )N   S8Sz,Sdtypef0)npzeroslenr6   view)vecnbytesvec1r6   vec2s        r&   _handle_truncated_float_vecr@      si     {xxC"((4.11VHBq6zl34yyuy%T
Jr0   c                   t        j                  d      }| j                  |      }|d   }|d   }|dz  }t        j                  t	        |       t         j
                        }d|t        j                  |dz        <   d|t        j                  |d	z        <   d
|t        j                  |dz        <   ||z  }||z	  |dz  dd
|z
  z   z  z  }|dz  }||dz	  dz  dz
  dz  |z   dz   dz  |dz  z  z  }t        j                  t	        |      fd      }||d<   ||d<   |j                  d      }|j                  d      }|S )zf
    Parse a vector of float values representing IBM 8 byte floats into
    native 8 byte floats.
    z>u4,>u4r5   r7   f1i    i       i  @    i         l          A   i     l        z>f8f8)	r8   r6   r;   r9   r:   uint8whereemptyastype)	r<   r6   r>   xport1xport2ieee1shiftieee2ieees	            r&   _parse_float_vecrW      sd   
 HHYE88%8 D$ZF$ZF ZE HHSXRXX.E+,E"((6J&
'(+,E"((6J&
'(+,E"((6J&
'( 
eOEu_&:"52U;K!LME 
ZE 
6R<4'2-!3u<tCJ E 88SZM3DDJDJ9959!D;;tDKr0   c                      e Zd ZeZ	 	 	 	 d	 	 	 	 	 	 	 	 	 ddZddZd ZddZddZ	ddZ
dddZd	 Z ee      ddd
       Zy)XportReaderNc                    || _         d| _        || _        || _        t	        |d|d|      | _        | j
                  j                  | _        	 | j                          y # t        $ r | j                           w xY w)Nr   rbF)encodingis_textcompression)	_encoding_lines_read_index
_chunksizer	   handleshandlefilepath_or_buffer_read_header	Exceptionclose)selfre   indexr\   	chunksizer^   s         r&   __init__zXportReader.__init__  s|     "#!#
 #',,"5"5	 	JJL	s   A A:c                8    | j                   j                          y N)rc   rh   ri   s    r&   rh   zXportReader.close  s    r0   c                T    | j                   j                  d      j                         S )NP   )re   readdecodero   s    r&   _get_rowzXportReader._get_row   s"    &&++B/6688r0   c           
        | j                   j                  d       | j                         }|t        k7  rd|v rt	        d      t	        d      | j                         }ddgddgd	dgd
dgddgg}t        ||      }|d   dk7  rt	        d      t        |d         |d<   || _        | j                         }t        |d d       |d<   | j                         }| j                         }|j                  t              }|t        k(  }	|r|	st	        d      t        |dd       }
ddgddgddgddgd	dgd
dgddgg}t        | j                         |      }ddgd
dgddgddgg}|j                  t        | j                         |             t        |d         |d<   t        |d         |d<   || _        ddd}t        | j                         dd       }|
|z  }|dz  r|d|dz  z
  z  }| j                   j                  |      }g }d}t        |      |
k\  r|d |
 ||
d  }}|j!                  d      }t#        j$                  d|      }t'        t)        t*        |            }|d
= ||d       |d <   |d!   }|d    dk(  r|d"k  s|dkD  rd#| d$}t-        |      |j/                         D ]  \  }}	 |j1                         ||<    ||d!   z  }||gz  }t        |      |
k\  r| j                         }|t4        k(  st	        d%      || _        || _        | j                   j;                         | _        | j?                         | _         | j6                  D cg c]  }|d&   jC                          c}| _"        tG        | j6                        D cg c]$  \  }}d'tI        |      z   d(tI        |d!         z   f& }}}tK        jL                  |      }|| _'        y # t2        $ r Y <w xY wc c}w c c}}w ))Nr   z**COMPRESSED**z<Header record indicates a CPORT file, which is not readable.z#Header record is not an XPORT file.prefixrH   versionr2   OSr   created   zSAS     SAS     SASLIBz!Header record has invalid prefix.modifiedzMember header not foundset_namesasdatar   (   typenumericchar)rC   rD   6   :   rq      z>hhhh8s40s8shhh2s8shhl52sr   r   rD   zFloating field width z is not between 2 and 8.zObservation header not found.r   r*   r4   )(re   seekrt   _correct_line1r"   r/   r'   	file_info
startswith_correct_header1_correct_header2intupdatemember_inforr   r:   ljuststructunpackdictzip
_fieldkeys	TypeErroritemsr)   AttributeError_correct_obs_headerfieldsrecord_lengthtellrecord_start_record_countnobsrs   columns	enumeratestrr8   r6   _dtype)ri   line1line2fifr   line3header1header2	headflag1	headflag2fieldnamelengthmemr   types
fieldcount
datalength	fielddatar   
obs_length
fieldbytesfieldstructfieldflmsgkvheaderxidtypelr6   s                                  r&   rf   zXportReader._read_header#  s[   $$Q' N"5( !R  BCC"~	1~ay3)iQS_Us+	X"::@AA*9Y+?@	)" +E#2J 7	* --/--/&&'78	//	i677gbn- qMONN1I"IO
 "$--/37B#rWbMFA;G;t}}<="-k*.E"FJ!,[-C!DI& &)B/0
$z1
?"zB..J++00<	
)n/ *?+/*+ "J $))#.J --(CZPKZ56Ec
"5>2E'N~&BW~*aR!V-bT1IJn$ 1 wwyE!H %//JugF7 )n/: ,,<==' 3388:&&(	48KK@q&	((*@
 &dkk2
5 3q6\3U>%:!;;<
 
  / &  A
s   .OO')O,	O$#O$c                B    | j                  | j                  xs d      S )NrC   nrows)rr   rb   ro   s    r&   __next__zXportReader.__next__  s    yyt3!y44r0   c                   | j                   j                  dd       | j                   j                         | j                  z
  }|dz  dk7  rt	        j
                  dt                      | j                  dkD  r4| j                   j                  | j                         || j                  z  S | j                   j                  dd       | j                   j                  d      }t        j                  |t        j                        }t        j                  |dk(        }t        |      dk(  rd}nd	t        |      z  }| j                   j                  | j                         ||z
  | j                  z  S )
z
        Get number of records in file.

        This is maybe suboptimal because we have to seek to the end of
        the file.

        Side effect: returns file position to record_start.
        r   rD   rq   zxport file may be corrupted.)
stacklevelir5   l     @@  r2   )re   r   r   r   warningswarnr   r   rr   r8   
frombufferuint64flatnonzeror:   )ri   total_records_lengthlast_card_bytes	last_cardixtail_pads         r&   r   zXportReader._record_count  s:    	$$Q*#66;;=@Q@QQ"$)MM.+-
 "##(():):;'4+=+===$$S!,1166r:MM/C	 ^^I)<<=r7a<H3r7{H$$T%6%67$x/D4F4FFFr0   c                B    || j                   }| j                  |      S )a  
        Reads lines from Xport file and returns as dataframe

        Parameters
        ----------
        size : int, defaults to None
            Number of lines to read.  If None, reads whole file.

        Returns
        -------
        DataFrame
        r   )rb   rr   )ri   sizes     r&   	get_chunkzXportReader.get_chunk  s#     <??Dyyty$$r0   c                    |j                  d      }|d   dk(  |d   dk(  z  |d   dk(  z  }|d   dk\  |d   d	k  z  |d   d
k(  z  |d   dk(  z  }||z  }|S )Nzu1,u1,u2,u4r5   rB   r   f2f3r7   rJ   Z   _   .   )r;   )ri   r<   r   missmiss1s        r&   _missing_doublezXportReader._missing_double  s    HH=H)$14A.!D'Q,?go!D'T/2w$ w$  	
 	r0   c                |   || j                   }t        || j                   | j                  z
        }|| j                  z  }|dk  r| j	                          t
        | j                  j                  |      }t        j                  || j                  |      }i }t        | j                        D ]  \  }}|dt        |      z      }	| j                  |   d   }
|
dk(  rLt        |	| j                  |   d         }	| j!                  |	      }t#        |	      }t        j$                  ||<   nf| j                  |   d   dk(  rQ|	D cg c]  }|j'                          }}| j(                  (|D cg c]  }|j+                  | j(                         }}|j-                  |i        t/        j0                  |      }| j2                  <t/        j4                  t7        | j                  | j                  |z               |_        n|j;                  | j2                        }| xj                  |z  c_        |S c c}w c c}w )Nr   )r6   countr*   r   r   r   r   )r   minr`   r   rh   StopIterationre   rr   r8   r   r   r   r   r   r   r@   r   rW   nanrstripr_   rs   r   r#   	DataFramera   Indexrangerj   	set_index)ri   r   
read_linesread_lenrawdatadf_datajr   r<   r   r   r   ydfs                  r&   rr   zXportReader.read  s   =IIE		D,<,< <=
 2 22q=JJL%%**84}}S:Fdll+ 	#DAqsSV|$CKKN7+E	!1#t{{1~n7UV++C0$S)&&$Q(F2),-AQXXZ-->>-;<=a$..1=A=NNAq6"	# \\'";;xxd&6&68H8H:8U VWBHdkk*BJ&	 . >s   H4,"H9)Nz
ISO-8859-1Ninfer)
re   zFilePath | ReadBuffer[bytes]r\   z
str | Nonerk   
int | Noner^   r   returnNone)r   r   )r   pd.DataFrame)r   r   rn   )r   r   r   r   )r   r   r   r   )__name__
__module____qualname___xport_reader_doc__doc__rl   rh   rt   rf   r   r   r   r   r   _read_method_docrr    r0   r&   rY   rY      s    G
 + $*18 	
  ( 
89l\5$GL%"	 %  %r0   rY   )r%   r   r   r   )r*   r   )-r   
__future__r   collectionsr   r   r   typingr   r   numpyr8   pandas.util._decoratorsr   pandas.util._exceptionsr   pandasr#   pandas.io.commonr	   pandas.io.sas.sasreaderr
   pandas._typingr   r   r   r   r   r   r   r   r   _base_params_doc_params2_doc_format_params_doc_iterator_doc_read_sas_docr   r   r'   r/   r@   rW   IteratorrY   r   r0   r&   <module>r     s   #        , 4  ' . ' 
 R ' 
' 
(C @9 A
      2    	  ,&6r~*cll ~r0   