
    Owg              	       *   d dl mZ d dlmZ d dlmZmZ d dlZd dlm	Z	 d dl
Z
d dlZd dlmZ d dlZd dlZd dlmZ d dlmc mZ d dlZd dlmZmZmZmZmZmZmZm Z m!Z! d dl"m#Z$ d d	l%m&Z&m'Z' d d
l(m)Z)  ejT                  g d      d        Z+d Z,d Z-d Z.d Z/ ejT                   ej`                  d ejb                  d       ejb                  d      g       ej`                  d ejb                  d            g      d        Z2 G d d      Z3y)    )Iterator)partial)BytesIOStringION)Path)URLError)is_platform_windows)	NA	DataFrame
MultiIndexSeries	Timestamp
date_rangeread_csv	read_htmlto_datetime)ArrowStringArrayStringArray)file_path_to_url)zchinese_utf-16.htmlzchinese_utf-32.htmlzchinese_utf-8.htmlzletz_latin1.html)paramsc                 ,     |ddd| j                         S )z6Parametrized fixture for HTML encoding test filenames.iodatahtml_encoding)param)requestdatapaths     P/var/www/horilla/myenv/lib/python3.12/site-packages/pandas/tests/io/test_html.pyhtml_encoding_filer   (   s     D&/7==AA    c                 6   t        |       t        |      k(  sJ dt        |        dt        |              d}t        t        d | |            }|sJ |       t        | |      D ]2  \  }}t	        j
                  ||g|i | |j                  s-J d        y )Nz*lists are not of equal size len(list1) == z, len(list2) == z$not all list elements are DataFramesc                 F    t        | t              xr t        |t              S N
isinstancer   )xys     r   <lambda>z(assert_framelist_equal.<locals>.<lambda>>   s    Ay1NjI6N r    zframes are both empty)lenallmapziptmassert_frame_equalempty)list1list2argskwargsmsgboth_framesframe_iframe_js           r   assert_framelist_equalr8   5   s    u:U# 	U %U	&#
 1CN	
K ;u- :
gw@@@==9"99 :r    c           	      
   t        j                  d      }t        j                  d       | j                  |dd       t        j                  t        d      5  t         |ddd	d
      d       d d d        y # 1 sw Y   y xY w)Nbs4html5lib__version__z4.2zPandas requires versionmatchr   r   html	spam.htmlflavor)pytestimportorskipsetattrraisesImportErrorr   )monkeypatchr   r:   s      r   test_bs4_version_failsrI   I   so    


e
$C

#]E2	{*C	D M(4=eLM M Ms   A99Bc                      d} d}d|z   dz   }t        j                  t        |      5  t        t	        |       d|       d d d        y # 1 sw Y   y xY w)Nz
google.comzinvalid flavorz\{z \} is not a valid set of flavorsr=   googler>   rB   )rC   rF   
ValueErrorr   r   )urlrB   r4   s      r   test_invalid_flavorrO   R   sR    
CF
&.>
>C	z	- @(3-x?@ @ @s   A		Ac                     t        j                  d       t        j                  d       t        j                  d        | dddd      }t        |ddg	      }t        |ddg	      }t        ||       y )
Nr:   lxmlr;   r   r   r?   valid_markup.htmlr   )	index_colrB   )rC   rD   r   r8   )r   filenamedfs_lxmldfs_bs4s       r   test_same_orderingrW   [   sg    



#ff.ABHQx@HAug>G8W-r    r:   r;   )marksrQ   c                 8    t        t        | j                        S )NrA   )r   r   r   )r   s    r   flavor_read_htmlrZ   f   s     9W]]33r    c            	       h	   e Zd Zd Zej
                  d        Zej
                  d        Zd Zd Z	ej                  j                  ej                  j                  d               Zej                  j                  ej                  j                  d               Zej                  j                  d        Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d Z!d Z"d Z#d Z$ej                  j                  ej                  j                  d               Z%ej                  j                  ej                  j                  ej                  j                  d                      Z&ej                  j                  d        Z'ej                  j                  d         Z(ej                  j                  d!        Z)ej                  j                  d"        Z*ej                  j                  d#        Z+ej                  j                  d$        Z,ej                  j                  d%        Z-ej                  j                  d&        Z.ej                  j                  d'        Z/d( Z0ej
                  d)        Z1ej                  j                  ej                  j                  d*               Z2ej                  j                  ej                  j                  d+               Z3d, Z4d- Z5d. Z6d/ Z7d0 Z8d1 Z9ej                  j                  d2        Z:ej                  j                  d3        Z;d4 Z<d5 Z=d6 Z>d7 Z?d8 Z@d9 ZAd: ZBd; ZCd< ZDd= ZEd> ZFd? ZGd@ ZHej                  j                  dAdBdCg      dD        ZJdE ZKdF ZLdG ZMdH ZNdI ZOdJ ZPdK ZQej                  j                  dL        ZRdM ZSdN ZTej                  j                  dOdB eUdPg      dQfdC eUdRg       eUdPg      fg      dS        ZVej                  j                  dTdBdCg      dU        ZWej                  j                  dV      dW        ZYdX ZZdY Z[ej                  j                  ej                  j                  dZ               Z\d[ Z]d\ Z^ej                  j                  dAg d]      d^        Z_d_ Z`d` Zada Zbdb ZcyQ)cTestReadHtmlc                 x    d}t        j                  t        |      5   |d       d d d        y # 1 sw Y   y xY w)NzPassing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.r=   a  <table>
                <thead>
                    <tr>
                        <th>A</th>
                        <th>B</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>1</td>
                        <td>2</td>
                    </tr>
                </tbody>
                <tbody>
                    <tr>
                        <td>3</td>
                        <td>4</td>
                    </tr>
                </tbody>
            </table>)r-   assert_produces_warningFutureWarning)selfrZ   r4   s      r   test_literal_html_deprecationz*TestReadHtml.test_literal_html_deprecationq   s@    > 	 ''SA 		 	 	s   	09c                      |dddd      S )Nr   r   r?   r@    r`   r   s     r   	spam_datazTestReadHtml.spam_data   s    ffk::r    c                      |dddd      S )Nr   r   r?   banklist.htmlrc   rd   s     r   banklist_datazTestReadHtml.banklist_data   s    ffo>>r    c                    t        t        j                  j                  d      j                  d      t	        j
                  t        d      t                    j                  dj                        j                  t              }|j                         } |t        |      ddid	
      d	   }t        j                  ||       y )N   )      abcdtypecolumnsz{:.3f}class	dataframer   )attrsrS   )r   nprandomdefault_rngpdIndexlistobjectr+   formatastypefloatto_htmlr   r-   r.   )r`   rZ   dfoutress        r   test_to_html_compatz TestReadHtml.test_to_html_compat   s    		%%a(//7eF;
 S!&&- 	 jjlSM';!71

 	c2&r    c                 v   t        t        dt        j                  dgd      t        g dd      t        dt        j                  dgd      t        g d	d      g d
g dg dg dd      }|dk(  rft	        t        j
                  g dt        j                              }t	        t        j
                  ddt        gt        j                              }n|dk(  rNt        j                  d      }ddl
m}  ||j                  g d            } ||j                  g d            }nMt        j                  d      }t        |j                  g d            }t        |j                  g d            }|j                  d      }	t        j                  d|      5   |t!        |	      |      d   }
d d d        t        t        dt        j                  dgd      t        g dd      t        dt        j                  dgd      t        g d	d      t        ddt        gd      t        g dd      ||d      }|dk(  rHdd l}ddl
m} t        |j$                  D ci c]  }| ||j                  ||   d            ! c}      }t'        j(                  
|       y # 1 sw Y   xY wc c}w )N   rl   Int64rn   )r   rj   rl         ?      @Float64)r   g       @r   )TFN)TFTabc)r   r   N)r   r   r   defghpythonr   r   pyarrowr   )ArrowExtensionArrayFindexzmode.string_storagedtype_backendTboolean)from_pandas)r   r   ru   nanr   arrayobject_r
   rC   rD   pandas.arraysr   r   r   rx   option_contextr   r   rq   r-   r.   )r`   string_storager   rZ   r   string_arraystring_array_napar   r   resultexpectedcols                r   test_dtype_backendzTestReadHtml.test_dtype_backend   s@   QN':IW5S"&&#.i@O9=(($%	
 X%&rxxrzz'RSL)"((Cb>*TUOi'$$Y/B9.rxx/HIL1"((;K2LMO$$Y/B+BHH_,EFL.rxx8H/IJOjjuj%4nE 	U%hsm=QRSTF	U QN':IW5S"&&#.i@O9=T5"-Y?/yA!$	
 I% 9   (// ,RXXhsmQUX-VWWH 	fh/9	U 	U,s   "J*($J6*J3c                     t        |d      5 }|j                  |j                                 ||j                  d      } ||j                  d      }d d d        t	               y # 1 sw Y   xY w)Nutf-8encodingcontentFirst Federal Bank of Floridar=   Metcalf Bankopenserve_contentreadrN   r8   )r`   
httpserverrh   rZ   r   df1df2s          r   test_banklist_urlzTestReadHtml.test_banklist_url   sr     -'2 	a$$QVVX$6"5C #$C	 	sC(	 	   A	A,,A5c                     t        |d      5 }|j                  |j                                 ||j                  d      } ||j                  d      }d d d        t	               y # 1 sw Y   xY w)Nr   r   r   	.*Water.*r=   Unitr   )r`   r   re   rZ   r   r   r   s          r   test_spam_urlzTestReadHtml.test_spam_url   sl     )g. 	A!$$QVVX$6":>>EC":>>@C	A
 	sC(	A 	Ar   c                 P     ||dddi      } ||dddi      }t        ||       y )Nz.*Florida.*idtabler>   rt   r   r8   )r`   rh   rZ   r   r   s        r   test_banklistzTestReadHtml.test_banklist   s:    tWo
 g
 	sC(r    c                      ||d      } ||d      }t        ||       |d   j                  d   dk(  sJ |d   j                  d   dk(  sJ y )Nr   r=   r   r   r   r   
ProximatesNutrient)r8   ilocrq   r`   re   rZ   r   r   s        r   	test_spamzTestReadHtml.test_spam  sZ    y<y7sC(1v{{4 L0001v~~a J...r    c                 F     ||      }|D ]  }t        |t              rJ  y r#   r$   )r`   re   rZ   dfsr   s        r   test_spam_no_matchzTestReadHtml.test_spam_no_match  s+    y) 	-Bb),,,	-r    c                 N     ||ddi      }|D ]  }t        |t              rJ  y )Nr   r   )rt   r$   )r`   rh   rZ   r   r   s        r   test_banklist_no_matchz#TestReadHtml.test_banklist_no_match  s1    }T7OD 	-Bb),,,	-r    c                 d     ||dd      d   }|j                   d   dk(  sJ |j                  rJ y )Nr   rj   r>   headerr   r   )rq   r/   )r`   re   rZ   r   s       r   test_spam_headerzTestReadHtml.test_spam_header  s9    i{1EaHzz!},,,88|8r    c                 H     ||dd      } ||dd      }t        ||       y Nr   r   r>   skiprowsr   r   r   s        r   test_skiprows_intzTestReadHtml.test_skiprows_int"  (    yaHyCsC(r    c                 l     ||dt        d            } ||dt        d            }t        ||       y Nr   rj   r   r   )ranger8   r   s        r   test_skiprows_rangez TestReadHtml.test_skiprows_range(  0    yeAhOyqJsC(r    c                 P     ||dddg      } ||dddg      }t        ||       y Nr   r   rj   r   r   r   r   s        r   test_skiprows_listzTestReadHtml.test_skiprows_list.  0    yq!fMy!QHsC(r    c                 P     ||dddh      } ||dddh      }t        ||       y r   r   r   s        r   test_skiprows_setzTestReadHtml.test_skiprows_set4  r   r    c                 H     ||dd      } ||dd      }t        ||       y r   r   r   s        r   test_skiprows_slicez TestReadHtml.test_skiprows_slice:  r   r    c                 l     ||dt        d            } ||dt        d            }t        ||       y r   slicer8   r   s        r   test_skiprows_slice_shortz&TestReadHtml.test_skiprows_slice_short@  r   r    c           	      r     ||dt        dd            } ||dt        ddd            }t        ||       y )	Nr   rj      r   r   rk   r   r   r   s        r   test_skiprows_slice_longz%TestReadHtml.test_skiprows_slice_longF  s6    yeAqkRyq!RQsC(r    c                      ||dt        j                  d            } ||dt        j                  d            }t        ||       y r   )ru   aranger8   r   s        r   test_skiprows_ndarrayz"TestReadHtml.test_skiprows_ndarrayL  s9    ybiiPQlSy1NsC(r    c                 z    t        j                  t        d      5   ||dd       d d d        y # 1 sw Y   y xY w)Nz%is not a valid type for skipping rowsr=   r   asdfr   )rC   rF   	TypeError)r`   re   rZ   s      r   test_skiprows_invalidz"TestReadHtml.test_skiprows_invalidR  s6    ]]9-TV 	LYkFK	L 	L 	Ls   1:c                 H     ||dd      } ||dd      }t        ||       y Nr   r   r>   rS   r   r   r   s        r   
test_indexzTestReadHtml.test_indexV  s(    yqIy!DsC(r    c                 L     ||ddd      } ||ddd      }t        ||       y Nr   r   r   )r>   r   rS   r   r   r   s        r   test_header_and_index_no_typesz+TestReadHtml.test_header_and_index_no_types[  -    yAQRSyqANsC(r    c                 L     ||ddd      } ||ddd      }t        ||       y r   r   r   s        r    test_header_and_index_with_typesz-TestReadHtml.test_header_and_index_with_types`  r   r    c                 H     ||dd      } ||dd      }t        ||       y r   r   r   s        r   test_infer_typeszTestReadHtml.test_infer_typese  s(    yqIy!DsC(r    c                 0   t        |d      5 }t        |j                               }d d d        t        |d      5 }t        |j                               }d d d         |d      } |d      }t        ||       y # 1 sw Y   YxY w# 1 sw Y   6xY wNzUTF-8r   r   r=   r   )r   r   r   r8   )r`   re   rZ   r   data1data2r   r   s           r   test_string_iozTestReadHtml.test_string_iok  s    )g. 	'!QVVX&E	' )g. 	'!QVVX&E	' uK8uF3sC(	' 	'	' 	's   B B B	Bc                     t        |d      5 }|j                         }d d d         |t              d      } |t        |      d      }t        ||       y # 1 sw Y   <xY wr  )r   r   r   r8   )r`   re   rZ   r   r   r   r   s          r   test_stringzTestReadHtml.test_stringv  sZ    )g. 	!668D	 x~[Ax~V<sC(	 	s   AA#c                     t        |d      5 } ||d      }d d d        t        |d      5 } ||d      }d d d        t               y # 1 sw Y   6xY w# 1 sw Y   "xY wr  )r   r8   )r`   re   rZ   r   r   r   s         r   test_file_likezTestReadHtml.test_file_like  sj    )g. 	9!"1K8C	9 )g. 	4!"1F3C	4 	sC(	9 	9	4 	4s   AAAA#c                     |j                  dd       t        j                  t        d      5   |dd       d d d        y # 1 sw Y   y xY w)Nz#urlopen error unknown url type: git  coder=   zgit://github.comr   )r   rC   rF   r   r`   r   rZ   s      r   test_bad_url_protocolz"TestReadHtml.test_bad_url_protocol  sK     	  !FS Q]]8+PQ 	D/{C	D 	D 	Ds   AAc                     |j                  dd       t        j                  t        t        fd      5   ||j
                  d       d d d        y # 1 sw Y   y xY w)NzName or service not knownr  r  zHTTP Error 404: NOT FOUNDr=   r   )r   rC   rF   r   rM   rN   r  s      r   test_invalid_urlzTestReadHtml.test_invalid_url  sS     	  !<3 G]]Hj19TU 	@Z^^;?	@ 	@ 	@s   AAc                     |} |t        t        j                  j                  |            dddi      }t	        |t
              sJ |D ]  }t	        |t              rJ  y )NFirstr   r   r   )r   ospathabspathr%   rz   r   r`   rh   rZ   rN   r   r   s         r   test_file_urlzTestReadHtml.test_file_url  s`    RWW__S12'$PW
 #t$$$ 	-Bb),,,	-r    c                     |}t        j                  t        d      5   ||dddi       d d d        y # 1 sw Y   y xY w)NzNo tables foundr=   r   r   	tasdfabler   rC   rF   rM   )r`   rh   rZ   rN   s       r   test_invalid_table_attrsz%TestReadHtml.test_invalid_table_attrs  s@    ]]:->? 	:4BU	 	 	   5>c                 b     ||dddiddg      d   }t        |j                  t              sJ y )NMetcalfr   r   r   r   )r>   rt   r   r%   rq   r   r`   rh   rZ   r   s       r   test_multiindex_headerz#TestReadHtml.test_multiindex_header  s;    4/1a&

 "**j111r    c                 b     ||dddiddg      d   }t        |j                  t              sJ y )Nr   r   r   r   r   )r>   rt   rS   )r%   r   r   r"  s       r   test_multiindex_indexz"TestReadHtml.test_multiindex_index  s<    4/aQRV

 "((J///r    c                      ||dddiddgddg      d   }t        |j                  t              sJ t        |j                  t              sJ y )Nr   r   r   r   r   )r>   rt   r   rS   )r%   rq   r   r   r"  s       r   test_multiindex_header_indexz)TestReadHtml.test_multiindex_header_index  s[    /q6!f
  "**j111"((J///r    c                 d     ||dddiddgd      d   }t        |j                  t              sJ y Nr   r   r   r   r   )r>   rt   r   r   r!  r"  s       r   &test_multiindex_header_skiprows_tuplesz3TestReadHtml.test_multiindex_header_skiprows_tuples  C    /q6
  "**j111r    c                 d     ||dddiddgd      d   }t        |j                  t              sJ y r)  r!  r"  s       r   test_multiindex_header_skiprowsz,TestReadHtml.test_multiindex_header_skiprows  r+  r    c                      ||dddiddgddgd      d   }t        |j                  t              sJ t        |j                  t              sJ y )Nr   r   r   r   r   )r>   rt   r   rS   r   )r%   r   r   rq   r"  s       r   %test_multiindex_header_index_skiprowsz2TestReadHtml.test_multiindex_header_index_skiprows  s^    /q6!f
  "((J///"**j111r    c                    |} |t        t        j                  j                  |            t	        j
                  t	        j
                  d            ddi      }t        |t              sJ |D ]  }t        |t              rJ  y )NFloridar   r   r   )	r   r  r  r  recompiler%   rz   r   r  s         r   test_regex_idempotencyz#TestReadHtml.test_regex_idempotency  ss    RWW__S12**RZZ	23/

 #t$$$ 	-Bb),,,	-r    c                 ~    d}t        j                  t        |      5   ||dd       d d d        y # 1 sw Y   y xY w)Nz\(you passed a negative value\)r=   Waterr   r   r  )r`   re   rZ   r4   s       r   test_negative_skiprowsz#TestReadHtml.test_negative_skiprows  s:    0]]:S1 	DYgC	D 	D 	Ds   3<c                      y)Naf  
          <table class="contentstable" align="center"><tr>
            <td width="50%">
            <p class="biglink"><a class="biglink" href="whatsnew/2.7.html">What's new in Python 2.7?</a><br/>
                <span class="linkdescr">or <a href="whatsnew/index.html">all "What's new" documents</a> since 2.0</span></p>
            <p class="biglink"><a class="biglink" href="tutorial/index.html">Tutorial</a><br/>
                <span class="linkdescr">start here</span></p>
            <p class="biglink"><a class="biglink" href="library/index.html">Library Reference</a><br/>
                <span class="linkdescr">keep this under your pillow</span></p>
            <p class="biglink"><a class="biglink" href="reference/index.html">Language Reference</a><br/>
                <span class="linkdescr">describes syntax and language elements</span></p>
            <p class="biglink"><a class="biglink" href="using/index.html">Python Setup and Usage</a><br/>
                <span class="linkdescr">how to use Python on different platforms</span></p>
            <p class="biglink"><a class="biglink" href="howto/index.html">Python HOWTOs</a><br/>
                <span class="linkdescr">in-depth documents on specific topics</span></p>
            </td><td width="50%">
            <p class="biglink"><a class="biglink" href="installing/index.html">Installing Python Modules</a><br/>
                <span class="linkdescr">installing from the Python Package Index &amp; other sources</span></p>
            <p class="biglink"><a class="biglink" href="distributing/index.html">Distributing Python Modules</a><br/>
                <span class="linkdescr">publishing modules for installation by others</span></p>
            <p class="biglink"><a class="biglink" href="extending/index.html">Extending and Embedding</a><br/>
                <span class="linkdescr">tutorial for C/C++ programmers</span></p>
            <p class="biglink"><a class="biglink" href="c-api/index.html">Python/C API</a><br/>
                <span class="linkdescr">reference for C/C++ programmers</span></p>
            <p class="biglink"><a class="biglink" href="faq/index.html">FAQs</a><br/>
                <span class="linkdescr">frequently asked questions (with answers!)</span></p>
            </td></tr>
        </table>

        <p><strong>Indices and tables:</strong></p>
        <table class="contentstable" align="center"><tr>
            <td width="50%">
            <p class="biglink"><a class="biglink" href="py-modindex.html">Python Global Module Index</a><br/>
                <span class="linkdescr">quick access to all modules</span></p>
            <p class="biglink"><a class="biglink" href="genindex.html">General Index</a><br/>
                <span class="linkdescr">all functions, classes, terms</span></p>
            <p class="biglink"><a class="biglink" href="glossary.html">Glossary</a><br/>
                <span class="linkdescr">the most important terms explained</span></p>
            </td><td width="50%">
            <p class="biglink"><a class="biglink" href="search.html">Search page</a><br/>
                <span class="linkdescr">search this documentation</span></p>
            <p class="biglink"><a class="biglink" href="contents.html">Complete Table of Contents</a><br/>
                <span class="linkdescr">lists all sections and subsections</span></p>
            </td></tr>
        </table>
        rc   r`   s    r   python_docszTestReadHtml.python_docs  s    -r    c                 p    |j                  |        ||j                  d      }t        |      dkD  sJ y )Nr   Pythonr=   r   )r   rN   r)   )r`   r:  r   rZ   r   s        r   test_multiple_matchesz"TestReadHtml.test_multiple_matches(  s5     	   5z~~X>3x!||r    c                     |j                  |        ||j                  d      }|D cg c]  }|j                  d   dd  }}t        |      ddgk(  sJ y c c}w )	Nr   r<  r=   r   r   rk   PythWhat)r   rN   r   sorted)r`   r:  r   rZ   r   r   zzs          r   test_python_docs_tablez#TestReadHtml.test_python_docs_table/  sc     	   5z~~X>+./RbggdmAa //bzff---- 0s   Ac                 J    d} |t        |            }t        |      dk(  sJ y)z@
        Make sure that read_html ignores empty tables.
        a  
            <table>
                <thead>
                    <tr>
                        <th>A</th>
                        <th>B</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>1</td>
                        <td>2</td>
                    </tr>
                </tbody>
            </table>
            <table>
                <tbody>
                </tbody>
            </table>
        r   N)r   r)   )r`   rZ   r?   r   s       r   test_empty_tableszTestReadHtml.test_empty_tables7  s+    ( "(4.16{ar    c                      |t        d            d   }t        ddgddggddg	      }t        j                  ||       y )
Na  <table>
            <thead>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>1</td>
                    <td>2</td>
                </tr>
            </tbody>
            <tbody>
                <tr>
                    <td>3</td>
                    <td>4</td>
                </tr>
            </tbody>
        </table>r   r   rj   rl   rk   ABr   rq   r   r   r-   r.   r`   rZ   r   r   s       r   test_multiple_tbodyz TestReadHtml.test_multiple_tbodyR  sT     "
. /2 Aq6Aq6"2S#JG
fh/r    c                 x     |t        d            d   }t        ddidg      }t        j                  ||       y)zt
        Don't fail with bs4 when there is a header and only one column
        as described in issue #9178
        a3  <table>
                <thead>
                    <tr>
                        <th>Header</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>first</td>
                    </tr>
                </tbody>
            </table>r   Headerfirstr   r   NrJ  rK  s       r   test_header_and_one_columnz'TestReadHtml.test_header_and_one_columnr  sJ    
 "
 " 8W"5aSA
fh/r    c                 |     |t        d            d   }t        g dgg d      }t        j                  ||       y)zK
        Ensure parser adds <tr> within <thead> on malformed HTML.
        a  <table>
            <thead>
                <tr>
                    <th>Country</th>
                    <th>Municipality</th>
                    <th>Year</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>Ukraine</td>
                    <th>Odessa</th>
                    <td>1944</td>
                </tr>
            </tbody>
        </table>r   )UkraineOdessa  )CountryMunicipalityYearrI  NrJ  rK  s       r   test_thead_without_trz"TestReadHtml.test_thead_without_tr  sL     "
& '* -.7

 	fh/r    c                 F   d}t        ddggddg      }t        ddgddggddg      }|j                  d	
      }|j                  d
      } |t        |            d   } |t        |            d   }t        j                  ||       t        j                  ||       y)zh
        Make sure that read_html reads tfoot, containing td or th.
        Ignores empty tfoot
        a  <table>
            <thead>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>bodyA</td>
                    <td>bodyB</td>
                </tr>
            </tbody>
            <tfoot>
                {footer}
            </tfoot>
        </table>bodyAbodyBrG  rH  rI  footAfootB )footerz%<tr><td>footA</td><th>footB</th></tr>r   N)r   r|   r   r-   r.   )	r`   rZ   data_template	expected1	expected2r  r  result1result2s	            r   test_tfoot_readzTestReadHtml.test_tfoot_read  s    
$ Wg$6#7#sL	G$w&89C:
	 $$B$/$$,S$T"8E?3A6"8E?3A6
gy1
gy1r    c                 |     |t        d      d      d   }t        ddggd      }t        j                  ||       y )Na
  
            <table>
                <tr>
                    <td>S</td>
                    <td>I</td>
                </tr>
                <tr>
                    <td>text</td>
                    <td>1944</td>
                </tr>
            </table>
        r   r   textrU  )SIrp   rJ  rK  s       r   &test_parse_header_of_non_string_columnz3TestReadHtml.test_parse_header_of_non_string_column  sO     " 
  !$ vtn-zB
fh/r    c                    ddl m fd} ||dddi      d   }t         |dd	d
d      t        t        d      }|j                  |j                  k(  sJ g d}g d}|j                  |      j                  ||      }	|j                  |      }
|	}ddg}||   j                  t              ||<   t        j                  ||
       y )Nr   )_remove_whitespacec                 8    	  |       S # t         $ r | cY S w xY wr#   )AttributeError)r&   rn  s    r   try_remove_wsz8TestReadHtml.test_banklist_header.<locals>.try_remove_ws  s'    )!,,! s    r   r   r   r   r   r   csvzbanklist.csv)Updated DateClosing Date
converters)
z,First Vietnamese American Bank In Vietnamesez"Westernbank Puerto Rico En Espanolz*R-G Premier Bank of Puerto Rico En EspanolzEurobank En EspanolzSanderson State Bank En EspanolzLWashington Mutual Bank (Including its subsidiary Washington Mutual Bank FSB)zSilver State Bank En Espanolz%AmTrade International Bank En EspanolzHamilton Bank, NA En Espanolz6The Citizens Savings Bank Pioneer Community Bank, Inc.)
zFirst Vietnamese American BankzWesternbank Puerto RicozR-G Premier Bank of Puerto RicoEurobankzSanderson State BankzWashington Mutual BankzSilver State BankzAmTrade International BankzHamilton Bank, NAzThe Citizens Savings Bankrt  rs  )pandas.io.htmlrn  r   r   shaper+   replaceapplyr   r-   r.   )r`   rh   r   rZ   rq  r   ground_trutholdnewdfnewgtnew	converted	date_colsrn  s                @r   test_banklist_headerz!TestReadHtml.test_banklist_header  s    5	 m9T7OTUVWT65.9(19M
 xx<-----

 }%--c37  /	#^4	(399+F	)
i/r    c                     d}t        |d      5 }|j                         }d d d        |v sJ  ||dddi      d   }||j                         v sJ y # 1 sw Y   4xY w)NzGold Canyonr   r   r   r   r   r   )r   r   	to_string)r`   rh   rZ   gcr   raw_textr   s          r   test_gold_canyonzTestReadHtml.test_gold_canyon  st    -'2 	 avvxH	  X~~tWo

 R\\^###	  	 s   AAc                      |t        d      d      d   } |t        d      d      d   }t        j                  ||       y )Na  <table>
                        <thead>
                            <tr style="text-align: right;">
                            <th></th>
                            <th>C_l0_g0</th>
                            <th>C_l0_g1</th>
                            <th>C_l0_g2</th>
                            <th>C_l0_g3</th>
                            <th>C_l0_g4</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                            <th>R_l0_g0</th>
                            <td> 0.763</td>
                            <td> 0.233</td>
                            <td> nan</td>
                            <td> nan</td>
                            <td> nan</td>
                            </tr>
                            <tr>
                            <th>R_l0_g1</th>
                            <td> 0.244</td>
                            <td> 0.285</td>
                            <td> 0.392</td>
                            <td> 0.137</td>
                            <td> 0.222</td>
                            </tr>
                        </tbody>
                    </table>r   rS   a  <table>
                    <thead>
                        <tr style="text-align: right;">
                        <th></th>
                        <th>C_l0_g0</th>
                        <th>C_l0_g1</th>
                        <th>C_l0_g2</th>
                        <th>C_l0_g3</th>
                        <th>C_l0_g4</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                        <th>R_l0_g0</th>
                        <td> 0.763</td>
                        <td> 0.233</td>
                        </tr>
                        <tr>
                        <th>R_l0_g1</th>
                        <td> 0.244</td>
                        <td> 0.285</td>
                        <td> 0.392</td>
                        <td> 0.137</td>
                        <td> 0.222</td>
                        </tr>
                    </tbody>
                 </table>)r   r-   r.   )r`   rZ   r   r   s       r   test_different_number_of_colsz*TestReadHtml.test_different_number_of_cols'  sh    # @ C"
D E"H ": =
> ?B 	fh/r    c                 |     |t        d            d   }t        g dgg d      }t        j                  ||       y )NaZ  
            <table>
                <tr>
                    <th>A</th>
                    <th colspan="1">B</th>
                    <th rowspan="1">C</th>
                </tr>
                <tr>
                    <td>a</td>
                    <td>b</td>
                    <td>c</td>
                </tr>
            </table>
        r   r   )rG  rH  Crp   rJ  rK  s       r   test_colspan_rowspan_1z#TestReadHtml.test_colspan_rowspan_1o  sD    !
" #& o.H
fh/r    c                      |t        d      d      d   }t        g dgg d      }t        j                  ||       y )Na  
            <table>
                <tr>
                    <td colspan="2">X</td>
                    <td>Y</td>
                    <td rowspan="2">Z</td>
                    <td>W</td>
                </tr>
                <tr>
                    <td>A</td>
                    <td colspan="2">B</td>
                    <td>C</td>
                </tr>
            </table>
        r   rh  )rG  rH  rH  Zr  )XzX.1Yr  WrI  rJ  rK  s       r    test_colspan_rowspan_copy_valuesz-TestReadHtml.test_colspan_rowspan_copy_values  sQ     "" %
& '* +,6Q
 	fh/r    c                      |t        d      d      d   }t        g dgg d      }t        j                  ||       y )Na(  
            <table>
                <tr>
                    <td rowspan="2">A</td>
                    <td rowspan="2" colspan="3">B</td>
                    <td>C</td>
                </tr>
                <tr>
                    <td>D</td>
                </tr>
            </table>
        r   rh  )rG  rH  rH  rH  D)rG  rH  zB.1zB.2r  rI  rJ  rK  s       r   test_colspan_rowspan_both_not_1z,TestReadHtml.test_colspan_rowspan_both_not_1  sQ     " 
  !$ +,6S
 	fh/r    c                      |t        d      d      d   }t        ddggddg      }t        j                  ||       y )Nz
            <table>
                <tr>
                    <td>A</td>
                    <td rowspan="2">B</td>
                </tr>
                <tr>
                    <td>C</td>
                </tr>
            </table>
        r   rh  r  rH  rG  rI  rJ  rK  s       r   test_rowspan_at_end_of_rowz'TestReadHtml.test_rowspan_at_end_of_row  sR     "
 
 " C:,c
C
fh/r    c                      |t        d      d      d   }t        ddgddggddg      }t        j                  ||       y )Nz
            <table>
                <tr>
                    <td rowspan="3">A</td>
                    <td rowspan="3">B</td>
                </tr>
            </table>
        r   rh  rG  rH  rI  rJ  rK  s       r   test_rowspan_only_rowsz#TestReadHtml.test_rowspan_only_rows  sY     "	 
  C:Sz":S#JO
fh/r    c                      |t        d            d   }t        ddgddggddgddgg      }t        dd	gg|
      }t        j                  ||       y )Nam  
            <table>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
                <tr>
                    <th>a</th>
                    <th>b</th>
                </tr>
                <tr>
                    <td>1</td>
                    <td>2</td>
                </tr>
            </table>
        r   rG  rH  r   r   r   levelscodesrj   rI  r   r   r   r-   r.   r`   rZ   r   rq   r   s        r   +test_header_inferred_from_rows_with_only_thz8TestReadHtml.test_header_inferred_from_rows_with_only_th  sn    !
& '* c3Z#s$<aVaQRVDTUAq6(G<
fh/r    c                    t        dt        dd      i      }|j                         } |t        |      dgd      }t	        j
                  ||d           |t        |      dgd      }t	        j
                  ||d          y )Ndate1/1/2001
   periodsr   r   parse_datesrS   )r   r   r   r   r-   r.   )r`   rZ   r   r   r   s        r   test_parse_dates_listz"TestReadHtml.test_parse_dates_list  sv    
:r BCD::<x1saP
b#a&)x1xSTU
b#a&)r    c                 $   t        t        dd            }t        |j                  d       |j                  d       d      } |t	        |j                               ddd	gid
      }t        d|i      }t        j                  ||d          y )Nr  r  r  c                 4    t        | j                               S r#   )strr  r&   s    r   r(   z7TestReadHtml.test_parse_dates_combine.<locals>.<lambda>%      AFFH r    c                 4    t        | j                               S r#   )r  timer  s    r   r(   z7TestReadHtml.test_parse_dates_combine.<locals>.<lambda>&  r  r    )r  r  datetimer   rj   r  r   )r   r   r   r+   r   r   r-   r.   )r`   rZ   	raw_datesr   r   newdfs         r   test_parse_dates_combinez%TestReadHtml.test_parse_dates_combine!  s    :j"=>	!&=>!&=>
 RZZ\"aV0DPQ
 :y12
eSV,r    c                     |dddd      }t         j                  j                  |      sJ t        |       d       t         j                  j	                  |      sJ t        |       d        ||dd	      d
   }|j
                  dk(  sJ d|j                  d   v sJ |d   j                  t        j                  d      k(  sJ t        j                  |j                  d   d      sJ y )Nr   r   r?   wikipedia_states.htmlz is not a filez is an empty fileArizonar   r   r   )<      Unnamedr   sq mifloat64)r   r  HzPN$A)r  r  isfilereprgetsizery  rq   ro   ru   allcloselocr`   r   rZ   r   r   s        r   test_wikipedia_states_tablez(TestReadHtml.test_wikipedia_states_table/  s    ff.EFww~~d#BT
|>%BB#wwt$Fd4E&FF$!$iB1E||x'''FNN2....g$$(;;;;{{6::j19===r    c                     |dddd      } ||dd      d   }|j                   dk(  sJ d	|j                  d
   d   v sJ |j                  j                  dk(  sJ t        j                  |j
                  d   d      sJ y )Nr   r   r?   r  r  r   r   )r     r  r   r   rj   )Alaska)zTotal area[2]r  r  )ry  rq   nlevelsru   r  r  r  s        r    test_wikipedia_states_multiindexz-TestReadHtml.test_wikipedia_states_multiindex9  s    ff.EF!$i1EaH||x'''FNN2.q1111~~%%***{{6::&JKYWWWr    c                      |t        d      ddg      }t        ddggt        j                  ddg      	      }t	        j
                  |d   |       y )
NaK  
                <table>
                    <thead>
                        <tr><th></th><th></tr>
                        <tr><th>A</th><th>B</th></tr>
                    </thead>
                    <tbody>
                        <tr><td>a</td><td>b</td></tr>
                    </tbody>
                </table>
            r   r   rh  r   r   )Unnamed: 0_level_0rG  )zUnnamed: 1_level_0rH  rp   )r   r   r   from_tuplesr-   r.   rK  s       r   %test_parser_error_on_empty_header_rowz2TestReadHtml.test_parser_error_on_empty_header_rowA  sd    !
 q6
  3ZL**,.IJ
 	fQi2r    c                      |t        d      d      d   }t        ddidg      }|d   j                  t        j                  d      k(  sJ t	        j
                  ||       y )	Na  <html>
            <body>
             <table>
                <thead>
                    <tr>
                        <th>Header</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>1100#101</td>
                    </tr>
                </tbody>
            </table>
            </body>
        </html>#)decimalr   rN  gClg0@rP  r  )r   r   ro   ru   r-   r.   rK  s       r   test_decimal_rowszTestReadHtml.test_decimal_rowsZ  so    !$ '
( ), 8X"6qcBh%%))<<<<
fh/r    argTFc                     t        j                  d      }t        j                  t        |      5   |||       d d d        y # 1 sw Y   y xY w)NzPassing a bool to header is invalid. Use header=None for no header or header=int or list-like of ints to specify the row(s) making up the column namesr=   rh  )r2  escaperC   rF   r   )r`   re   r  rZ   r4   s        r   test_bool_header_argz!TestReadHtml.test_bool_header_argw  sF     ii

 ]]9C0 	4Ys3	4 	4 	4s   AAc                      |t        d      dt        i      d   }t        dddgi      }t        j                  ||       y )Na  <table>
                 <thead>
                   <tr>
                     <th>a</th>
                    </tr>
                 </thead>
                 <tbody>
                   <tr>
                     <td> 0.763</td>
                   </tr>
                   <tr>
                     <td> 0.244</td>
                   </tr>
                 </tbody>
               </table>r   ru  r   z0.763z0.244)r   r  r   r-   r.   rK  s       r   test_converterszTestReadHtml.test_converters  sR    !" Sz%
& '* cGW#567
fh/r    c                      |t        d      dg      d   }t        ddt        j                  gi      }t	        j
                  ||       y )Na  <table>
                 <thead>
                   <tr>
                     <th>a</th>
                   </tr>
                 </thead>
                 <tbody>
                   <tr>
                     <td> 0.763</td>
                   </tr>
                   <tr>
                     <td> 0.244</td>
                   </tr>
                 </tbody>
               </table>gZd;?)	na_valuesr   r   g"~j?r   r   ru   r   r-   r.   rK  s       r   test_na_valueszTestReadHtml.test_na_values  sS    !" g%
& '* cE266?34
fh/r    c                 ,   d}t        dddgi      } |t        |      d      d   }t        j                  ||       t        dt        j
                  t        j
                  gi      } |t        |      d      d   }t        j                  ||       y )	Na  <table>
                        <thead>
                            <tr>
                            <th>a</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                            <td> N/A</td>
                            </tr>
                            <tr>
                            <td> NA</td>
                            </tr>
                        </tbody>
                    </table>r   zN/Ar
   F)keep_default_nar   T)r   r   r-   r.   ru   r   )r`   rZ   	html_dataexpected_dfhtml_dfs        r   test_keep_default_naz!TestReadHtml.test_keep_default_na  s     	   udm 45"8I#6NqQ
k73rvvrvv&6 78"8I#6MaP
k73r    c                      |t        d            d   }t        ddgt        j                  t        j                  ggddg      }t	        j
                  ||       y )Nak  
            <table>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
                <tr>
                    <td>a</td>
                    <td>b</td>
                </tr>
                <tr>
                    <td></td>
                    <td></td>
                </tr>
            </table>
        r   r   r   rG  rH  rI  r  rK  s       r   test_preserve_empty_rowsz%TestReadHtml.test_preserve_empty_rows  s\    !
& '* C:/?"@3PS*U
fh/r    c                      |t        d            d   }t        ddgddggddgddgg      }t        dd	gg|
      }t        j                  ||       y )NaU  
            <table>
                <thead>
                    <tr><th></th><th></tr>
                    <tr><th>A</th><th>B</th></tr>
                    <tr><th>a</th><th>b</th></tr>
                </thead>
                <tbody>
                    <tr><td>1</td><td>2</td></tr>
                </tbody>
            </table>
        r   rG  rH  r   r   r   r  rj   rI  r  r  s        r   ,test_ignore_empty_rows_when_inferring_headerz9TestReadHtml.test_ignore_empty_rows_when_inferring_header  sn    !
 " c3Z#s$<aVaQRVDTUAq6(G<
fh/r    c                     t        g d      }g dg dg|_        |j                  d      } |t        |            d   }t	        j
                  ||       y )N))HillaryD   r  )BernieJ   r  )DonaldE   R)r   )r  AgeParty)NamezUnnamed: 1_level_1zUnnamed: 2_level_1Fr   r   )r   rq   r   r   r-   r.   )r`   rZ   r  r?   r  s        r   test_multiple_header_rowsz&TestReadHtml.test_multiple_header_rows  sY    Q
 3@
 """/"8D>215
k73r    c                 |     |dddd      } ||d      }t        |t              sJ t        |d   t              sJ y )Nr   r   r?   rR   r   r  )r%   rz   r   )r`   r   rZ   rT   r   s        r   test_works_on_valid_markupz'TestReadHtml.test_works_on_valid_markup  sC    D&&2EFx15#t$$$#a&),,,r    c                 4     |dddd      } ||dddg       y )	Nr   r   r?   rg   r   rQ   r;   rL   rc   )r`   r   rZ   rh   s       r   test_fallback_successz"TestReadHtml.test_fallback_success  s#     vvGk6:BVWr    c                     t        dd      }t        t        j                  j	                  d      j                  d      |      }|j                         }d|v sJ y )Nz
2000-01-01r  r  rj   )r  rk   r   )r   r   ru   rv   rw   standard_normalr   )r`   rngr   r   s       r   test_to_html_timestampz#TestReadHtml.test_to_html_timestamp  sN    r2ryy,,Q/??HPSTv%%%r    c                 ^   t        dddg      }|j                         }|j                  d      }|j                  d      }|j                  d      }|j                  d      }|j                  d      }d|v sJ ||k(  sJ ||k(  sJ ||k7  sJ d	|v sJ d
|vsJ d|vsJ ||k(  sJ y )Nr   rj   rG  rH  T)borderr   Fz border="1"z border="2"z border="0"z border)r   r   )r`   r   out_border_defaultout_border_trueout_border_explicit_defaultout_border_nondefaultout_border_zeroout_border_falses           r   test_to_html_borderlessz$TestReadHtml.test_to_html_borderless"  s    aa()*ZZ\**D*1&(jjj&:# "

!
 4**A*.::U:3 2222"4444!%@@@@!%:::: 5555O333 0000"2222r    zdisplayed_only,exp0,exp1fooNzfoo  bar  baz  quxc                     d} |t        |      |      }t        j                  |d   |       |t        j                  |d   |       y t        |      dk(  sJ y )Na  <html>
          <body>
            <table>
              <tr>
                <td>
                  foo
                  <span style="display:none;text-align:center">bar</span>
                  <span style="display:none">baz</span>
                  <span style="display: none">qux</span>
                </td>
              </tr>
            </table>
            <table style="display: none">
              <tr>
                <td>foo</td>
              </tr>
            </table>
          </body>
        </html>displayed_onlyr   r   )r   r-   r.   r)   )r`   r	  exp0exp1rZ   r   r   s          r   test_displayed_onlyz TestReadHtml.test_displayed_only5  sY    ( x~nM
c!fd+!!#a&$/s8q= =r    r	  c                     d} |t        |      |      d   }t        ddgddgd      }t        j                  ||       y )	NaW  
        <table>
            <tr>
                <th>A</th>
                <th>B</th>
            </tr>
            <tr>
                <td>1</td>
                <td>2</td>
            </tr>
            <tr>
                <td><span style="display:none"></span>4</td>
                <td>5</td>
            </tr>
        </table>
        r  r   r   rk   rj   r   r  rJ  )r`   r	  rZ   
html_tabler   r   s         r   &test_displayed_only_with_many_elementsz3TestReadHtml.test_displayed_only_with_many_elementsZ  sM    
  "(:"6~V
 Aq6A78
fh/r    z\ignore:You provided Unicode markup but also provided a value for from_encoding.*:UserWarningc                    t         j                  j                  |      }t         j                  j                  |      d   }|j	                  d      \  }}	 t        |d      5 } ||j                         |d      j                         }d d d        t        |d      5 } |t        |j                               |d      j                         }	d d d         |||d      j                         }
t        j                  	       t        j                  ||
       y # 1 sw Y   xY w# 1 sw Y   [xY w# t        $ r( t               rd|v sd|v rt        j                           w xY w)Nr   _rb)r   rS   1632)r  r  basenamesplitextsplitr   r   popr   r-   r.   	Exceptionr	   rC   skip)r`   r   rZ   	base_pathrootr  r   fobjfrom_stringfrom_file_likefrom_filenames              r   test_encodezTestReadHtml.test_encoder  sE   
 GG$$%78	ww	*1-jjo8	($/ 4.IIK(a#% 
 ($/ 4!1DIIK(8q"#% 
 -"Xce  !!+~>!!+}= 
   	"$8#tx'7KKM	s=   D6 #(DD6 1D*AD6 D'#D6 *D3/D6 61E'c                    |j                   j                  d      dk(  rt        j                  d        G d dt              } |d      } ||      sJ t        j
                  t        d      5   ||       d d d        y # 1 sw Y   y xY w)	NrB   rQ   zNot applicable for lxmlc                       e Zd Zd Zy)FTestReadHtml.test_parse_failure_unseekable.<locals>.UnseekableStringIOc                      yNFrc   r9  s    r   seekablezOTestReadHtml.test_parse_failure_unseekable.<locals>.UnseekableStringIO.seekable  s    r    N)__name__
__module____qualname__r'  rc   r    r   UnseekableStringIOr$    s    r    r+  z?
            <table><tr><td>spam<foobr />eggs</td></tr></table>z#passed a non-rewindable file objectr=   )keywordsgetrC   r  r   rF   rM   )r`   rZ   r+  bads       r   test_parse_failure_unseekablez*TestReadHtml.test_parse_failure_unseekable  s     $$((2f<KK12	 	 !B

  $$$]]:-RS 	"S!	" 	" 	"s   0	BBc                 `     G d d      } |d      } |d      } ||      sJ  ||      sJ y )Nc                   :    e Zd Zd	dZd
dZd Zd Zd ZdefdZ	y)9TestReadHtml.test_parse_failure_rewinds.<locals>.MockFilereturnNc                      || _         d| _        y r&  )r   at_end)r`   r   s     r   __init__zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.__init__  s     	#r    c                 H    | j                   rdn| j                  }d| _         |S )Nr_  T)r5  r   )r`   sizer   s      r   r   z>TestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.read  s    ![[rdii"r    c                     d| _         y r&  )r5  )r`   offsets     r   seekz>TestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.seek  s	    #r    c                      y)NTrc   r9  s    r   r'  zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.seekable  s    r    c                      y r#   rc   r9  s    r   __next__zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.__next__  s    r    c                     | S r#   rc   r9  s    r   __iter__zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.__iter__  s	     r    )r3  Nr#   )
r(  r)  r*  r6  r   r;  r'  r>  r   r@  rc   r    r   MockFiler2    s'    $
$( r    rA  z/<table><tr><td>spam<br />eggs</td></tr></table>z2<table><tr><td>spam<foobr />eggs</td></tr></table>rc   )r`   rZ   rA  goodr.  s        r   test_parse_failure_rewindsz'TestReadHtml.test_parse_failure_rewinds  s@    	 	2 IJKL%%%$$$r    c                     G d dt         j                        } |dddd      } |||f      } |||f      }|j                          |j                          |j                         s|j                         r#	 |j                         r|j                         r#d |j                  cxu r|j                  u sJ  J y )Nc                        e Zd Z fdZ xZS )@TestReadHtml.test_importcheck_thread_safety.<locals>.ErrorThreadc                 l    	 t         |           d | _        y # t        $ r}|| _        Y d }~y d }~ww xY wr#   )superrunerrr  )r`   rJ  	__class__s     r   rI  zDTestReadHtml.test_importcheck_thread_safety.<locals>.ErrorThread.run  s5    $GKM  $DH ! #"DHH#s    	3.3)r(  r)  r*  rI  __classcell__)rK  s   @r   ErrorThreadrF    s    $ $r    rM  r   r   r?   rR   )targetr2   )	threadingThreadstartis_aliverJ  )r`   r   rZ   rM  rT   helper_thread1helper_thread2s          r   test_importcheck_thread_safetyz+TestReadHtml.test_importcheck_thread_safety  s    
	$)** 	$ D&&2EF$,<H;O$,<H;O%%'>+B+B+D %%'>+B+B+D~))?^-?-??????r    c                      |dddd      }t        |      } ||      d   } ||      d   }t        j                  ||       y )Nr   r   r?   r@   r   )r   r-   r.   )r`   r   rZ   file_path_string	file_pathr   r   s          r   test_parse_path_objectz#TestReadHtml.test_parse_path_object  sM    #D&&+F)*	/03y)!,
c3'r    c                 x     |t        d            d   }t        dggdg      }t        j                  ||       y )Nz
            <table>
                <tr>
                    <th>A</th>
                </tr>
                <tr>
                    <td>word1<br>word2</td>
                </tr>
            </table>
        r   zword1 word2rG  rI  rJ  rK  s       r   test_parse_br_as_spacez#TestReadHtml.test_parse_br_as_space  sH    !	
  M?"3cUC
fh/r    )r*   bodyr   r`  c                 n   d}g dg dg dg dg dg dd}|d	   }|d
   }|d   }|dk(  r|d   }|d   }|d   }n |dk(  r|d   }n|dk(  r|d   }n
|dk(  r|d   } |t        |      |      d   }t        ||g|      }	|	j                  t        j                        }	t        j                  ||	       y )Na  
          <table>
            <tr>
              <th>HTTP</th>
              <th>FTP</th>
              <th><a href="https://en.wiktionary.org/wiki/linkless">Linkless</a></th>
            </tr>
            <tr>
              <td><a href="https://en.wikipedia.org/">Wikipedia</a></td>
              <td>SURROUNDING <a href="ftp://ftp.us.debian.org/">Debian</a> TEXT</td>
              <td>Linkless</td>
            </tr>
            <tfoot>
              <tr>
                <td><a href="https://en.wikipedia.org/wiki/Page_footer">Footer</a></td>
                <td>
                  Multiple <a href="1">links:</a> <a href="2">Only first captured.</a>
                </td>
              </tr>
            </tfoot>
          </table>
          )HTTPFTPLinkless))r^  N)r_  N)r`  z'https://en.wiktionary.org/wiki/linkless)	WikipediaSURROUNDING Debian TEXTr`  ))ra  zhttps://en.wikipedia.org/)rb  zftp://ftp.us.debian.org/)r`  N)Footer$Multiple links: Only first captured.N))rc  z)https://en.wikipedia.org/wiki/Page_footer)rd  1N)head_ignorehead_extractbody_ignorebody_extractfooter_ignorefooter_extractrh  rj  rf  r*   ri  rk  rg  r\  r`  r   extract_linksr   rp   )r   r   fillnaru   r   r-   r.   )
r`   r  rZ   gh_13141_datagh_13141_expecteddata_expfoot_exphead_expr   r   s
             r   test_extract_linkszTestReadHtml.test_extract_links  s    0 7
 P

%
2 %]3$_5$]3%<(8H()9:H(8HF](8HH_()9:HH_(8H!(="9MaPh18D??266*
fh/r    c                     d}t        j                  t        |      5  t        |d       d d d        y # 1 sw Y   y xY w)NzY`extract_links` must be one of {None, "header", "footer", "body", "all"}, got "incorrect"r=   	incorrectrl  rC   rF   rM   r   )r`   re   r4   s      r   test_extract_links_badz#TestReadHtml.test_extract_links_badC  s<    I 	 ]]:S1 	<i{;	< 	< 	<r  c                 z    d} |t        |      d      d   }t        dgg      }t        j                  ||       y )Nz
        <table>
          <tr>
            <td>
              <a href='https://google.com'>Google.com</a>
            </td>
          </tr>
        </table>
        r*   rl  r   )z
Google.comzhttps://google.comrJ  r`   rZ   r   r   r   s        r    test_extract_links_all_no_headerz-TestReadHtml.test_extract_links_all_no_headerK  sA     "(4.FqICDEF
fh/r    c                     d}t        j                  t        |      5  t        dd       d d d        y # 1 sw Y   y xY w)NzPdtype_backend numpy is invalid, only 'numpy_nullable' and 'pyarrow' are allowed.r=   testnumpyr   rw  )r`   r4   s     r   test_invalid_dtype_backendz'TestReadHtml.test_invalid_dtype_backendZ  s;    % 	 ]]:S1 	5fG4	5 	5 	5r  c                     d} |t        |            d   }t        ddgddggddg	      }t        j                  ||       y )
Na  
        <table>
            <tr>
                <th>
                    <style>.style</style>
                    A
                    </th>
                <th>B</th>
            </tr>
            <tr>
                <td>A1</td>
                <td>B1</td>
            </tr>
            <tr>
                <td>A2</td>
                <td>B2</td>
            </tr>
        </table>
        r   A1B1A2B2rG  rH  rI  rJ  rz  s        r   test_style_tagzTestReadHtml.test_style_tagb  sK    & "(4.1!4D$<$">c
S
fh/r    )dr(  r)  r*  ra   rC   fixturere   rh   r   r   marknetwork
single_cpur   r   slowr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r
  r  r  r  r  r#  r%  r'  r*  r-  r/  r4  r7  r:  r=  rC  rE  rL  rQ  rY  rf  rl  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  parametrizer  r  r  r  r  r  r  r  r  r  r  r   r  r  filterwarningsr!  r/  rC  rU  rY  r[  rt  rx  r{  r  r  rc   r    r   r\   r\   p   sp   @ ^^; ; ^^? ?':0x [[[[)  )  [[[[)  ) [[) )/-
-

))))))))L)
)
)
)	))) [[[[D  D
 [[[[[[@   @
 [[- - [[  [[2 2 [[0 0 [[	0 	0 [[2 2 [[2 2 [[
2 
2 [[	- 	-D
 ^^. .` [[[[  
 [[[[.  . 60@040@$2L02 [[-0 -0^ [[	$ 	$F0P02!0F0@0:0*08*->X320: [[UT5M24 34060642040.4- [[X X
&3& [["9eW%t,I345y%7IJ	
!!< [[-e}=0 >0. [[	&	8"( %D [[[[@  @.(0* [[U$GHB0 IB0H<050r    r\   )4collections.abcr   	functoolsr   r   r   r   r  pathlibr   r2  rO  urllib.errorr   r~  ru   rC   pandas.compatr	   pandas.util._test_decoratorsutil_test_decoratorstdpandasrx   r
   r   r   r   r   r   r   r   r   pandas._testing_testingr-   pandas.core.arraysr   r   pandas.io.commonr   r  r   r8   rI   rO   rW   r   
skip_if_norZ   r\   rc   r    r   <module>r     s   $  
  	  !   - ) ) 
 
 
 
 . BB
:(M@. U=2==#7z9R"STV=2==#8944I0 I0r    