
    KwgG              	          d Z dZ	 dZddlZd Z ej
                  ddddedd      Zdddi fdZ G d d	      Z	 e	       Z	d
Z
dZdZdZ ee      ZdZg dZd ZddefdZereZd ZdZd ZddefdZd ZddZddZedk(  r ed       ddlZddlZddlmZ ddlZ e       Z dZ!ejD                  dd D ]j  Z#ejH                  jK                  e#      s	 e&d       ' e'e#      5 Z(e(jS                         Z*ddd        e&de#d ee*               ee*d       e!dz  Z!l e!r e&d e       e z
  dd       yyy# e$ r dZY w xY w# 1 sw Y   VxY w) at  Very simple and fast XML parser, used for intra-paragraph text.

Devised by Aaron Watters in the bad old days before Python had fast
parsers available.  Constructs the lightest possible in-memory
representation; parses most files we have seen in pure python very
quickly.

The output structure is the same as the one produced by pyRXP,
our validating C-based parser, which was written later.  It will
use pyRXP if available.

This is used to parse intra-paragraph markup.

Example parse::

    <this type="xml">text <b>in</b> xml</this>

    ( "this",
      {"type": "xml"},
      [ "text ",
        ("b", None, ["in"], None),
        " xml"
        ]
       None )

    { 0: "this"
      "type": "xml"
      1: ["text ",
          {0: "b", 1:["in"]},
          " xml"]
    }

Ie, xml tag translates to a tuple:
 (name, dictofattributes, contentlist, miscellaneousinfo)

where miscellaneousinfo can be anything, (but defaults to None)
(with the intention of adding, eg, line number information)

special cases: name of "" means "top level, no containing tag".
Top level parse always looks like this::

    ("", list, None, None)

 contained text of None means <simple_tag/>

In order to support stuff like::

    <this></this><one></one>

AT THE MOMENT &amp; ETCETERA ARE IGNORED. THEY MUST BE PROCESSED
IN A POST-PROCESSING STEP.

PROLOGUES ARE NOT UNDERSTOOD.  OTHER STUFF IS PROBABLY MISSING.
    Nc                     t        |        y )N)print)ss    N/var/www/horilla/myenv/lib/python3.12/site-packages/reportlab/lib/rparsexml.pywarnCBr   >   s	    a       zstring input)ErrorOnValidityErrorsNoNoDTDWarningExpandCharacterEntitiesExpandGeneralEntitiesr   srcName
ReturnUTF8c                 b    |t         _        t        j                  | fi |}|xr |xs dd |gd fS )N )pyRXP_parsereoCBparse)xmlTextoneOutermostTagr   entityReplacer	parseOptsps         r   parsexmlr   I   s:     w33$1:D!T(::r   c                       e Zd Zed        Zy)smartDecodec                 b    dd l fd}t        |      t        j                  _         ||       S )Nr   c                 r    t        | t              r| S j                  |       }| j                  |d         S )Nencoding)
isinstancestrdetectdecode)r   cddchardets     r   __call__z&smartDecode.__call__.<locals>.__call__U   s2    !C (..#C88C
O,,r   )r%   staticmethodr   	__class__r&   )r   r&   r%   s     @r   r&   zsmartDecode.__call__Q   s,     		-
 *6h)?&r   N)__name__
__module____qualname__r'   r&    r   r   r   r   P   s    	 	r   r   r   	<![CDATA[z]]>))z&lt;<)z&gt;>)z&amp;&c                     g }| D ]7  }d|v r t         D ]  \  }}|j                  ||      } |j                  |       9 |S )Nr0   )replacelistreplaceappend)contentListresulteoldnews        r   unEscapeContentListr:   f   sS    F !8) (
cIIc3'(a	
 Mr   c                 \    t         rt        d      t        | |      \  }}|r|d   d   S |S )z.official interface: discard unused cursor infoz)pyRXP not found, fallback parser disabled)r      r   )RequirePyRXPImportError	parsexml0)xmltextr   r   r   r6   cursors         r   parsexmlSimplerB   o   s8    EFF GVVay|r   c                 L    t        | d      j                         }t        |      S )Nr)openreadr   )filenameraws     r   	parseFilerI   |   s!    
x

"
"
$CC=r   c                     d}d}|r| j                  d|      }|dk  r	 |S |dz   }d}|D ]E  }t        |      }| |||z    |k(  sd}| j                  d|      }|dk  rt        d|z        |dz   }G |d}|r|S )zBskip any prologue found after cursor, return index of rest of text)z!DOCTYPEz?xmlz!--Nr.   r   r	   r/   zcan't close prologue %r)findlen
ValueError)	textrA   prologue_elementsdoneopenbracketpastfoundr7   les	            r   skip_prologuerU      s     4D
,iiV,q=% M 1}" 	"AQBDb!1$3-!8$%>%BCC	" =D ," Mr   c                    t        |       } t        }dx}x}}|| j                         } |}| j                  d|      }	| |	dz   |	dz    }
d}|	dk  r@|!| |d g}|r ||      }||||ft	        |       fS t        dt        | ||dz          z        g }|t        x}}t        | |      }nU|	dk  rt        dt        | dd	       z        |
d
k(  rj| |	|	dz    dk(  r_|	dz   }| j                  t        |      }|dk  rt        dt        | ||dz          z        t        }| || g}|t	        t              z   }d}n|
dk(  rs| |	|	dz    dk(  rh| j                  d|	dz         }||	k  rt        dt        | ||dz          z        |dz   }| |   dk7  rt        dt        | ||dz          z        d|dz   fS | j                  d|	      }|dk  }|dz   }|	dz   }| || }d|vr%|d   dk(  r|dd }d}|j                         }|}|}n d|v rpd}|s t	        |dz   j                  d            dz  rd}|H| j                  d|      }|dz   }|dk  }| || }|s t	        |dz   j                  d            dz  rd}|H|rt        dt        | |	|	dz          z        |}| |dz
     dk(  r|dz
  }|dd }d}|j                         }|j                  d      }|d   }|j                         }|d   }|}|d   }|d   dz   |d<   i x}}d}t	        |      }||k  r||   }|dz   }|j                         }|d   dk7  rt        dt        |      z         d|dd vr5||kD  rt        dt        |      z         ||   } |dz   }|d| }d|dd vr5|j                         }|j                         }!|!d   }"|dt	        |"        }#|#j                         }#	 |#d   }$|#d   }%|$|%cxk(  rdk(  sn |$|%cxk(  rd k(  rn n|#dd }#|#||<   |"}||k  r||}|| j                  d|      }&|&|k  rQt        k(  r(d}| |d }'t	        |       }|'rj|j                  |'       nWt        d!|d"t        | ||dz                | |&dz      dk(  r| j                  d|&      }(|(|&k  rt        d#t        | |&|&dz          z        | |&dz   |( })|)j                         }*|*d   }+|+k7  ry| d| },t	        |,j                  d$            }-| d| },t	        |,j                  d$            }.t        d%|.d&|-d't        |      d&t        |+      d(t        | ||d)z          
      | ||& }'|'r|j                  |'       |(dz   }d}n=| ||& }'|'r|j                  |'       t        | |&d|*      \  }/}|/r|j                  |/       ||r
|r ||      }||||f}0|0|fS #  t        dt        |#||!f      z         xY w)+zsimple recursive descent xml parser...
       return (dictionary, endcharacter)
       special case: comment returns (None, endcharacter)Nr.   r	      r   zno tags at non-toplevel %s   z.non top level entry should be at start tag: %s
   z![	   r-   zunclosed CDATA %sz!-   z<!--z--zunterminated comment %sr<   r/   z*invalid comment: contains double dashes %s=/".zunclosed start tag %sz fz-attribute value must start with double quoteszunclosed value zattvalue,attentry,attlist='zno close bracket for z found after zunclosed close tag %s
z	at lines z...z close tag name doesn't match  d   )
startingattoplevelr   )r   NONAMEstriprK   rL   rM   reprrU   CDATAENDMARKERCDATAMARKERsplitr4   r?   )1r@   re   rf   r   
NameStringContentListAttDict
ExtraStuffrA   firstbracketafterbracket2char
docontentsLname
startcdataendcdataendcommentdashes
endcommentclosebracketnoclosestartsearchpastfirstbracket
tagcontentstoptaglisttaglist0taglist0listattributenameDtaglistindexlasttaglistindexattentrynextattentryattlistnextattnameattvaluefirstlastnextopenbracket	remaindernextclosebracketclosetagcontentsclosetaglist	closenameprefix
endlinenumlinenum	parsetreets1                                                    r   r?   r?      s    '"G J)--K-'J --/F<<V,LQ|A~>
 JA~ #&vw/0!1L;"G[*Es7|SS !=WVTZ[]T]E^@_!_``
A
  &&J"7F3F	aMPTU\]`^`UaPbbcc	D	 W\,q.%I;%V &aJ||NJ?Hz !4tGF6RT9<U7V!VWW$J":x89Kc.11FJ	D	 W\,q.%I6%Q&||D,q.A, !:T'&QWXZQZB[=\!\]])!+Jz"C' !MPTU\]cdjkmdmUnPo!opp*Q,''
 #<<\:L"1nG&q.K+A~ !1,?J*$b>3& ",CRJ!%J!'')!
$*$D#z#~&<&<S&A"BA"E,'.||C'E&21n".q.%,-=l%K
"c:c>*@*@*E&F&I!"D , $%<tGLYefhYhDi?j%jkk$ <>*C/#/>L!+CRJ!%J'--/
$**3/ #1:'~~/ $A!
 ,R 0%bk$.  ! #&w< "#33&|4H#/>L'~~/H{C'()X[_`h[i)ijjXab\1'(88",->h-O"PP'.|'<'3A~.6#E Xab\1  (~~/H&nn.G")"+K'(:#k*:):;H'~~/Hj ((2,T d'C'5$+;+;#+Ab>'/Am$$/M7 ##33: 

 %ll37Ov%<#J ' 0I \F +$QUVZ[bcikqrtkt[uVv%wxx*+S0#*<<_#E #O3$%<tGO]lmo]oDp?q%qrr#*?1+<>N#O /557
 )O	?$Wf-F!$V\\$%7!8J$[j1F!&,,t"45G$
DJYgV\^deh^hNiIj&l m m#F?;	HHY')!+!
 $F?;	HHY' '0O^b  tB  'C#FHHY'i 
 t )D;	Wk:6Av;Qj()EdHV^_fKgFh)hiis   
V? ?Wc           	         t        | t        t        f      r| S | \  }}}}|si }g }|j                         D ]&  }||   }|j	                  |dt        |             ( dj                  |      }|s|rt        d      |bt        t        t        |            }	dj                  |	      }
|s|
S |
j                  d      }ddj                  |      z   }
d|d|d|
d	|d
	S d|d|dS )z!pretty printer mainly for testingr\   rc   zname missing with attributes???rb   z   z
   r.   z>
z
</r/   z/>)r    r!   byteskeysr4   ri   joinrM   listmappprettyprintrl   )	parsedxmlru   attdicttextlistextrar   kv
attributestextlistpprint
textpprintnllists               r   r   r     s    )SK('0$T7HeBGG\\^ /AJ!T!W-./ '"JJ:;;c,9:YY~.
!!$'W\\&12
'+ZTJJ  z**r   c                     ddl m } ddlm}  |       }t        | d      }t        d |       |z
         |dz  r ||       |dz  r"t        d       t	        |      }t        |       y y )	Nr   time)pprintr	   )r   DONEr[   z============== reformatting)r   r   rB   r   r   )r   dumpr   r   nowr   r   s          r   	testparser     s^    
&Cq+A	&$&*Avq	Av+,Oa r   c                     t        d|        y )Na-  <this type="xml">text &lt;&gt;<b>in</b> <funnytag foo="bar"/> xml</this>
                 <!-- comment -->
                 <![CDATA[
                 <this type="xml">text <b>in</b> xml</this> ]]>
                 <tag with="<brackets in values>">just testing brackets feature</tag>
                 r   )r   r   s    r   testr     s     
  r   __main__r   r   z!!!!! no file at {f!r}zparsing z |t|=z	timed at z.2fz secs.)r   )+__doc__r=   simpleparsepyRXPUr   Parserr   r   r>   r   rg   NAMEKEYCONTENTSKEYrk   rL   LENCDATAMARKERrj   r2   r:   rB   rI   verboserU   r?   r   r   r   r)   sysosr   	reportlabr   seenargvfpathisfiler   rE   _frF   r   r,   r   r   <module>r      s  5n K 6==./'(01./!' .%&L +,TTV ;  m	
[!< -.4GZ  H 0 #$a*fP+4  ZaL
&CDXXab\ ww~~a *+a BGGIHQEs1vh/0aQAID 	$&*S)01 ! K  K` s   *D> &E>E	E	E	