
    Fwg                         d dl mZmZmZ d dlmZ 	 d dlmZ d dlm	Z
mZ e
e_	        ee_        dgZ G d de      Z G d de      Z G d	 d
e      Zy)    )absolute_importdivisionunicode_literals)str)urllib)parserequestRobotFileParserc                   H    e Zd ZdZddZd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zy)r
   zs This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    c                 l    g | _         d | _        d| _        d| _        | j	                  |       d| _        y )NFr   )entriesdefault_entrydisallow_all	allow_allset_urllast_checkedselfurls     Z/var/www/horilla/myenv/lib/python3.12/site-packages/future/backports/urllib/robotparser.py__init__zRobotFileParser.__init__   s4    !!S    c                     | j                   S )zReturns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        )r   r   s    r   mtimezRobotFileParser.mtime&   s        r   c                 6    ddl }|j                         | _        y)zYSets the time the robots.txt file was last fetched to the
        current time.

        r   N)timer   )r   r   s     r   modifiedzRobotFileParser.modified/   s    
 	 IIKr   c                 p    || _         t        j                  j                  |      dd \  | _        | _        y)z,Sets the URL referring to a robots.txt file.      N)r   r   r   urlparsehostpathr   s     r   r   zRobotFileParser.set_url7   s-    %||44S9!A>	49r   c                    	 t         j                  j                  | j                        }|j	                         }| j                  |j                  d      j                                y# t         j                  j                  $ r@}|j                  dv rd| _        n |j                  dk\  rd| _        Y d}~yY d}~yY d}~yd}~ww xY w)z4Reads the robots.txt URL and feeds it to the parser.zutf-8)i  i  Ti  N)r   r	   urlopenr   readr   decode
splitlineserror	HTTPErrorcoder   r   )r   frawerrs       r   r'   zRobotFileParser.read<   s    		9&&txx0A &&(CJJszz'*5578 ||%% 	&xx:%$(!S!% ! "	&s   )A* *C,CCc                     d|j                   v r| j                  || _        y y | j                  j                  |       y N*)
useragentsr   r   appendr   entrys     r   
_add_entryzRobotFileParser._add_entryI   s=    %"""!!)%*" * LL&r   c                    d}t               }|D ]  }|s4|dk(  rt               }d}n"|dk(  r| j                  |       t               }d}|j                  d      }|dk\  r|d| }|j                         }|sh|j	                  dd      }t        |      dk(  s|d   j                         j                         |d<   t        j                  j                  |d   j                               |d<   |d   dk(  rB|dk(  r| j                  |       t               }|j                  j                  |d          d}*|d   dk(  r3|dk7  s9|j                  j                  t        |d   d	             d}e|d   d
k(  so|dk7  sv|j                  j                  t        |d   d             d} |dk(  r| j                  |       yy)zParse the input lines from a robots.txt file.

        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        r   r       #N:z
user-agentdisallowFallowT)Entryr7   findstripsplitlenlowerr   r   unquoter3   r4   	rulelinesRuleLine)r   linesstater6   lineis         r   r   zRobotFileParser.parseR   s     !	"DA:!GEEaZOOE*!GEE		#AAvBQx::<D::c1%D4yA~q'--///1Q ,,..tAw}}?Q7l*z. %$$++DG4E!W
*z..xQ/GH !!W'z..xQ/FG !C!	"D A:OOE" r   c                 H   | j                   ry| j                  ryt        j                  j	                  t        j                  j                  |            }t        j                  j                  dd|j                  |j                  |j                  |j                  f      }t        j                  j                  |      }|sd}| j                  D ]&  }|j                  |      s|j                  |      c S  | j                  r| j                  j                  |      S y)z=using the parsed robots.txt decide if useragent can fetch urlFT /)r   r   r   r   r"   rD   
urlunparser$   paramsqueryfragmentquoter   
applies_to	allowancer   )r   	useragentr   
parsed_urlr6   s        r   	can_fetchzRobotFileParser.can_fetch   s    >> \\**6<<+?+?+DE
ll%%r"Z__j..
0C0C'E Fll  %C\\ 	,E	*s++	, %%//44r   c                 v    dj                  | j                  D cg c]  }t        |      dz    c}      S c c}w )NrL   
)joinr   r   r5   s     r   __str__zRobotFileParser.__str__   s,    wwt||DeE
T)DEEDs   6N)rL   )__name__
__module____qualname____doc__r   r   r   r   r'   r7   r   rW   r[    r   r   r
   r
      s6    
!(?
9'0#f.Fr   c                   "    e Zd ZdZd Zd Zd Zy)rF   zoA rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.c                 l    |dk(  r|sd}t         j                  j                  |      | _        || _        y )NrL   T)r   r   rR   r$   rT   )r   r$   rT   s      r   r   zRuleLine.__init__   s-    2:iILL&&t,	"r   c                 Z    | j                   dk(  xs |j                  | j                         S r1   )r$   
startswith)r   filenames     r   rS   zRuleLine.applies_to   s%    yyCA8#6#6tyy#AAr   c                 J    | j                   xr dxs ddz   | j                  z   S )NAllowDisallowz: )rT   r$   r   s    r   r[   zRuleLine.__str__   s$    *78jD@499LLr   N)r\   r]   r^   r_   r   rS   r[   r`   r   r   rF   rF      s    1#BMr   rF   c                   (    e Zd ZdZd Zd Zd Zd Zy)r>   z?An entry has one or more user-agents and zero or more rulelinesc                      g | _         g | _        y )N)r3   rE   r   s    r   r   zEntry.__init__   s    r   c                     g }| j                   D ]  }|j                  d|dg        | j                  D ]  }|j                  t        |      dg         dj	                  |      S )NzUser-agent: rY   rL   )r3   extendrE   r   rZ   )r   retagentrI   s       r   r[   zEntry.__str__   sc    __ 	6EJJt45	6NN 	*DJJD	4()	*wws|r   c                     |j                  d      d   j                         }| j                  D ]  }|dk(  r y|j                         }||v s y y)z2check if this entry applies to the specified agentrM   r   r2   TF)rA   rC   r3   )r   rU   rn   s      r   rS   zEntry.applies_to   sX     OOC(+113	__ 	E|KKME	!	 r   c                 d    | j                   D ]!  }|j                  |      s|j                  c S  y)zZPreconditions:
        - our agent applies to this entry
        - filename is URL decodedT)rE   rS   rT   )r   re   rI   s      r   rT   zEntry.allowance   s2     NN 	&Dx(~~%	& r   N)r\   r]   r^   r_   r   r[   rS   rT   r`   r   r   r>   r>      s    Ir   r>   N)
__future__r   r   r   future.builtinsr   future.backportsr   future.backports.urllibr   _parser	   _request__all__objectr
   rF   r>   r`   r   r   <module>ry      s^    B B 
 $ H 
EFf EFPMv M""F "r   