
    OwgK:                         d Z dZdgZddlmZ ddlZddlZddlmZm	Z	m
Z
mZmZ ddlmZmZ ddlmZmZmZmZmZ d	Z G d
 dee      Z G d de      Zy)zCUse the HTMLParser library to parse HTML files that aren't too bad.MITHTMLParserTreeBuilder    )
HTMLParserN)CDataCommentDeclarationDoctypeProcessingInstruction)EntitySubstitutionUnicodeDammit)DetectsXMLParsedAsHTMLParserRejectedMarkupHTMLHTMLTreeBuilderSTRICTzhtml.parserc                   d    e Zd ZdZdZdZd Zd Zd ZddZ	ddZ
d	 Zd
 Zd Zd Zd Zd Zd Zy)BeautifulSoupHTMLParserzA subclass of the Python standard library's HTMLParser class, which
    listens for HTMLParser events and translates them into calls
    to Beautiful Soup's tree construction API.
    ignorereplacec                     |j                  d| j                        | _        t        j                  | g|i | g | _        | j                          y)a  Constructor.

        :param on_duplicate_attribute: A strategy for what to do if a
            tag includes the same attribute more than once. Accepted
            values are: REPLACE (replace earlier values with later
            ones, the default), IGNORE (keep the earliest value
            encountered), or a callable. A callable must take three
            arguments: the dictionary of attributes already processed,
            the name of the duplicate attribute, and the most recent value
            encountered.           
        on_duplicate_attributeN)popREPLACEr   r   __init__already_closed_empty_element_initialize_xml_detector)selfargskwargss      N/var/www/horilla/myenv/lib/python3.12/site-packages/bs4/builder/_htmlparser.pyr   z BeautifulSoupHTMLParser.__init__.   sN     '-jj$dll'
# 	D24262 -/)%%'    c                     t        |      )N)r   )r   messages     r    errorzBeautifulSoupHTMLParser.errorJ   s     #7++r!   c                 N    | j                  ||d      }| j                  |       y)zHandle an incoming empty-element tag.

        This is only called when the markup looks like <tag/>.

        :param name: Name of the tag.
        :param attrs: Dictionary of the tag's attributes.
        F)handle_empty_elementN)handle_starttaghandle_endtag)r   nameattrstags       r    handle_startendtagz*BeautifulSoupHTMLParser.handle_startendtagZ   s)     ""4U"K4 r!   c                    i }|D ]Q  \  }}|d}||v r=| j                   }|| j                  k(  rn&|d| j                  fv r|||<   n ||||       n|||<   d}S | j                         \  }	}
| j                  j                  |dd||	|
      }|r<|j                  r0|r.| j                  |d       | j                  j                  |       | j                  | j                  |       yy)a3  Handle an opening tag, e.g. '<tag>'

        :param name: Name of the tag.
        :param attrs: Dictionary of the tag's attributes.
        :param handle_empty_element: True if this tag is known to be
            an empty-element tag (i.e. there is not expected to be any
            closing tag).
        N z"")
sourceline	sourceposF)check_already_closed)r   IGNOREr   getpossoupr'   is_empty_elementr(   r   append	_root_tag_root_tag_encountered)r   r)   r*   r&   	attr_dictkeyvalueon_dupe	attrvaluer/   r0   r+   s               r    r'   z'BeautifulSoupHTMLParser.handle_starttagi   s    	 	JC }i 55dkk)t|| 44%*IcNIsE2!&	#I%	( !%
Iii''$iJ ( 
 3'',@ t%@ --44T:>>!&&t, "r!   c                     |r*|| j                   v r| j                   j                  |       y| j                  j                  |       y)zHandle a closing tag, e.g. '</tag>'
        
        :param name: A tag name.
        :param check_already_closed: True if this tag is expected to
           be the closing portion of an empty-element tag,
           e.g. '<tag></tag>'.
        N)r   remover4   r(   )r   r)   r1   s      r    r(   z%BeautifulSoupHTMLParser.handle_endtag   s<      DD,M,M$M
 --44T:II##D)r!   c                 :    | j                   j                  |       y)z4Handle some textual data that shows up between tags.N)r4   handle_datar   datas     r    rA   z#BeautifulSoupHTMLParser.handle_data   s    		d#r!   c                    |j                  d      rt        |j                  d      d      }n8|j                  d      rt        |j                  d      d      }nt        |      }d}|dk  r<| j                  j                  dfD ]!  }|s	 t        |g      j                  |      }# |s	 t        |      }|xs d}| j                  |       y# t        $ r
}Y d}~Xd}~ww xY w# t        t        f$ r
}Y d}~Bd}~ww xY w)zHandle a numeric character reference by converting it to the
        corresponding Unicode character and treating it as textual
        data.

        :param name: Character number, possibly in hexadecimal.
        x   XN   zwindows-1252u   �)
startswithintlstripr4   original_encoding	bytearraydecodeUnicodeDecodeErrorchr
ValueErrorOverflowErrorrA   )r   r)   	real_namerC   encodinges         r    handle_charrefz&BeautifulSoupHTMLParser.handle_charref   s     ??3DKK,b1I__S!DKK,b1ID	Is? "YY88.I $i[188BD	 9~ 22 * 
 . s$   C,C% 	C"C"%C>9C>c                 x    t         j                  j                  |      }||}nd|z  }| j                  |       y)zHandle a named entity reference by converting it to the
        corresponding Unicode character(s) and treating it as textual
        data.

        :param name: Name of the entity reference.
        Nz&%s)r   HTML_ENTITY_TO_CHARACTERgetrA   )r   r)   	characterrC   s       r    handle_entityrefz(BeautifulSoupHTMLParser.handle_entityref   s>     '??CCDI	 D 4<Dr!   c                     | j                   j                          | j                   j                  |       | j                   j                  t               y)zOHandle an HTML comment.

        :param data: The text of the comment.
        N)r4   endDatarA   r   rB   s     r    handle_commentz&BeautifulSoupHTMLParser.handle_comment   s8    
 					d#		'"r!   c                     | j                   j                          |t        d      d }| j                   j                  |       | j                   j                  t               y)zYHandle a DOCTYPE declaration.

        :param data: The text of the declaration.
        zDOCTYPE N)r4   r]   lenrA   r	   rB   s     r    handle_declz#BeautifulSoupHTMLParser.handle_decl   sI    
 			C
O$%		d#		'"r!   c                    |j                         j                  d      rt        }|t        d      d }nt        }| j
                  j                          | j
                  j                  |       | j
                  j                  |       y)z{Handle a declaration of unknown type -- probably a CDATA block.

        :param data: The text of the declaration.
        zCDATA[N)upperrI   r   r`   r   r4   r]   rA   )r   rC   clss      r    unknown_declz$BeautifulSoupHTMLParser.unknown_decl  sf    
 ::<""8,CH'DC				d#		#r!   c                     | j                   j                          | j                   j                  |       | j                  |       | j                   j                  t               y)z\Handle a processing instruction.

        :param data: The text of the instruction.
        N)r4   r]   rA   _document_might_be_xmlr
   rB   s     r    	handle_piz!BeautifulSoupHTMLParser.handle_pi  sG    
 					d###D)		/0r!   N)T)__name__
__module____qualname____doc__r2   r   r   r$   r,   r'   r(   rA   rV   r[   r^   ra   re   rh    r!   r    r   r   $   sQ     FG(8, !5-n*$$&P&##1r!   r   c                   P     e Zd ZdZdZdZeZeee	gZ
dZd fd	Z	 	 ddZd Z xZS )	r   zpA Beautiful soup `TreeBuilder` that uses the `HTMLParser` parser,
    found in the Python standard library.
    FTc                     t               }dD ]  }||v s|j                  |      }|||<    t        t        |   di | |xs g }|xs i }|j                  |       d|d<   ||f| _        y)a  Constructor.

        :param parser_args: Positional arguments to pass into 
            the BeautifulSoupHTMLParser constructor, once it's
            invoked.
        :param parser_kwargs: Keyword arguments to pass into 
            the BeautifulSoupHTMLParser constructor, once it's
            invoked.
        :param kwargs: Keyword arguments for the superclass constructor.
        )r   Fconvert_charrefsNrm   )dictr   superr   r   updateparser_args)r   rt   parser_kwargsr   extra_parser_kwargsargr;   	__class__s          r    r   zHTMLParserTreeBuilder.__init__*  s     #f. 	1Cf}

3+0#C(	1 	#T3=f=!'R%+01,1()'7r!   c              #      K   t        |t              r	|dddf y|g}|g}||g}t        |||d|      }|j                  |j                  |j
                  |j                  f yw)a  Run any preliminary steps necessary to make incoming markup
        acceptable to the parser.

        :param markup: Some markup -- probably a bytestring.
        :param user_specified_encoding: The user asked to try this encoding.
        :param document_declared_encoding: The markup itself claims to be
            in this encoding.
        :param exclude_encodings: The user asked _not_ to try any of
            these encodings.

        :yield: A series of 4-tuples:
         (markup, encoding, declared encoding,
          has undergone character replacement)

         Each 4-tuple represents a strategy for converting the
         document to Unicode and parsing it. Each strategy will be tried 
         in turn.
        NFT)known_definite_encodingsuser_encodingsis_htmlexclude_encodings)
isinstancestrr   markuprL   declared_html_encodingcontains_replacement_characters)	r   r   user_specified_encodingdocument_declared_encodingr}   rz   r{   try_encodingsdammits	            r    prepare_markupz$HTMLParserTreeBuilder.prepare_markupC  s     * fc"4u-- %<#<  5502LM%=)/
 }}f66,,557 	7s   A%A'c                     | j                   \  }}t        |i |}| j                  |_        	 |j                  |       |j	                          g |_        y# t
        $ r}t        |      d}~ww xY w)z{Run some incoming markup through some parsing process,
        populating the `BeautifulSoup` object in self.soup.
        N)rt   r   r4   feedcloseAssertionErrorr   r   )r   r   r   r   parserrU   s         r    r   zHTMLParserTreeBuilder.feedt  sp     ''f($9&9ii	*KKLLN /1+  	* 'q))		*s   !A 	A/A**A/)NN)NNN)ri   rj   rk   rl   is_xml	picklable
HTMLPARSERNAMEr   r   featuresTRACKS_LINE_NUMBERSr   r   r   __classcell__)rx   s   @r    r   r     sF     FIDdF#H 82 >BJN/7b1r!   )rl   __license____all__html.parserr   syswarningsbs4.elementr   r   r   r	   r
   
bs4.dammitr   r   bs4.builderr   r   r   r   r   r   r   r   rm   r!   r    <module>r      sf    I   # 
   9  
v1j*@ v1rf1O f1r!   