
    =wg                        d Z ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ dZ	 	 d2d
Z G d de      Zd Z G d de      Z G d de      ZeZ G d de      Z G d de      Z G d de      Z G d de      Z G d de      Zd Zd Zd Zd Zd  Z G d! d"e      Z G d# d$e      Z  G d% d&e      Z! G d' d(e      Z" G d) d*e      Z#d3d,Z$d-d	d+ed.fd/Z% G d0 d1e      Z&y	)4aR  The highlight module contains classes and functions for displaying short
excerpts from hit documents in the search results you present to the user, with
query terms highlighted.

The highlighting system has four main elements.

* **Fragmenters** chop up the original text into __fragments__, based on the
  locations of matched terms in the text.

* **Scorers** assign a score to each fragment, allowing the system to rank the
  best fragments by whatever criterion.

* **Order functions** control in what order the top-scoring fragments are
  presented to the user. For example, you can show the fragments in the order
  they appear in the document (FIRST) or show higher-scoring fragments first
  (SCORE)

* **Formatters** turn the fragment objects into human-readable output, such as
  an HTML string.

See :doc:`/highlight` for more information.
    )division)deque)nlargest)groupby
htmlescape)Tokeni   Nc                     ||r|d   j                   nd}||r|d   j                  n
t        |       }t        d||z
        }t	        t        |       ||z         }t        | |||      S )zfReturns a :class:`Fragment` object based on the :class:`analysis.Token`
    objects in ``tokens`.
    r   )	startcharendcharlenmaxminFragment)texttokensr   r   charsbefore
charsafters         G/var/www/horilla/myenv/lib/python3.12/site-packages/whoosh/highlight.pymkfragr   B   sp     +1F1I''q	(.&*$$CIAy;./I#d)Wz12GD&)W55    c                   6    e Zd ZdZd	dZd Zd Zd Zd Zd Z	y)
r   aX  Represents a fragment (extract) from a hit document. This object is
    mainly used to keep track of the start and end points of the fragment and
    the "matched" character ranges inside; it does not contain the text of the
    fragment or do much else.

    The useful attributes are:

    ``Fragment.text``
        The entire original text from which this fragment is taken.

    ``Fragment.matches``
        An ordered list of objects representing the matched terms in the
        fragment. These objects have ``startchar`` and ``endchar`` attributes.

    ``Fragment.startchar``
        The index of the first character in the fragment.

    ``Fragment.endchar``
        The index of the last character in the fragment.

    ``Fragment.matched_terms``
        A ``set`` of the ``text`` of the matched terms in the fragment (if
        available).
    c                     || _         || _        |dk(  rt        |      }|| _        || _        t               | _        |D ]4  }t        |d      s| j                  j                  |j                          6 y)a  
        :param text: the source text of the fragment.
        :param matches: a list of objects which have ``startchar`` and
            ``endchar`` attributes, and optionally a ``text`` attribute.
        :param startchar: the index into ``text`` at which the fragment starts.
            The default is 0.
        :param endchar: the index into ``text`` at which the fragment ends.
            The default is -1, which is interpreted as the length of ``text``.
        r   r   N)	r   matchesr   r   r   setmatched_termshasattradd)selfr   r   r   r   ts         r   __init__zFragment.__init__m   sk     	b=$iG" U 	/Aq&!""&&qvv.	/r   c                 `    d| j                   | j                  t        | j                        fz  S )Nz<Fragment %d:%d %d>)r   r   r   r   r    s    r   __repr__zFragment.__repr__   s,    $(+DLL(9(; ; 	;r   c                 4    | j                   | j                  z
  S Nr   r   r$   s    r   __len__zFragment.__len__   s    ||dnn,,r   c                     | j                   }| j                  }|j                   }|j                  }||cxk  xr |k  nc xs ||cxk  xr |k  S c S r'   r   r   r    fragmentscecfscfecs         r   overlapszFragment.overlaps   sJ    ^^\\  S212=b=1=1r   c                     | j                   }| j                  }|j                   }|j                  }t        ||      t        ||      z
  S r'   )r   r   r   r   r,   s         r   overlapped_lengthzFragment.overlapped_length   sA    ^^\\  2s|c"cl**r   c                 0    t        |       t        |      k  S r'   )id)r    others     r   __lt__zFragment.__lt__   s    $x"U)##r   N)r   r   )
__name__
__module____qualname____doc__r"   r%   r)   r2   r4   r8    r   r   r   r   S   s%    2/0;-2+$r   r   c              #   H   K   | D ]  }|j                   |v |_        |  y wr'   )r   matched)r   termsetr!   s      r   set_matched_filterrA      s*      FFg%	s    "c                       e Zd Zd Zd Zd Zy)
Fragmenterc                      y)a  Returns True if this fragmenter requires retokenized text.

        If this method returns True, the fragmenter's ``fragment_tokens``
        method  will be called with an iterator of ALL tokens from the text,
        with the tokens for matched terms having the ``matched`` attribute set
        to True.

        If this method returns False, the fragmenter's ``fragment_matches``
        method will be called with a LIST of matching tokens.
        Tr=   r$   s    r   must_retokenizezFragmenter.must_retokenize   s     r   c                     t         )zYields :class:`Fragment` objects based on the tokenized text.

        :param text: the string being highlighted.
        :param all_tokens: an iterator of :class:`analysis.Token`
            objects from the string.
        NotImplementedError)r    r   
all_tokenss      r   fragment_tokenszFragmenter.fragment_tokens   s
     "!r   c                     t         )a  Yields :class:`Fragment` objects based on the text and the matched
        terms.

        :param text: the string being highlighted.
        :param matched_tokens: a list of :class:`analysis.Token` objects
            representing the term matches in the string.
        rG   )r    r   matched_tokenss      r   fragment_matcheszFragmenter.fragment_matches   s
     "!r   N)r9   r:   r;   rE   rJ   rM   r=   r   r   rC   rC      s    "	"r   rC   c                        e Zd ZdZefdZd Zy)WholeFragmentera  Doesn't fragment the token stream. This object just returns the entire
    entire stream as one "fragment". This is useful if you want to highlight
    the entire text.

    Note that even if you use the `WholeFragmenter`, the highlight code will
    return no fragment if no terms matched in the given field. To return the
    whole fragment even in that case, call `highlights()` with `minscore=0`::

        # Query where no terms match in the "text" field
        q = query.Term("tag", "new")

        r = mysearcher.search(q)
        r.fragmenter = highlight.WholeFragmenter()
        r.formatter = highlight.UppercaseFormatter()
        # Since no terms in the "text" field matched, we get no fragments back
        assert r[0].highlights("text") == ""

        # If we lower the minimum score to 0, we get a fragment even though it
        # has no matching terms
        assert r[0].highlights("text", minscore=0) == "This is the text field."

    c                     || _         y r'   )	charlimit)r    rQ   s     r   r"   zWholeFragmenter.__init__   s	    "r   c                     | j                   }g }|D ]A  }|r|j                  |kD  r n.|j                  s#|j                  |j	                                C t        ||      gS r'   )rQ   r   r?   appendcopyr   )r    r   r   rQ   r   r!   s         r   rJ   zWholeFragmenter.fragment_tokens   s[    NN	 	)AQYY2yyqvvx(		)
 w'((r   Nr9   r:   r;   r<   DEFAULT_CHARLIMITr"   rJ   r=   r   r   rO   rO      s    . "3 #)r   rO   c                   $    e Zd ZdZddefdZd Zy)SentenceFragmentera|  Breaks the text up on sentence end punctuation characters
    (".", "!", or "?"). This object works by looking in the original text for a
    sentence end as the next character after each token's 'endchar'.

    When highlighting with this fragmenter, you should use an analyzer that
    does NOT remove stop words, for example::

        sa = StandardAnalyzer(stoplist=None)
       z.!?c                 @    || _         t        |      | _        || _        y)zf
        :param maxchars: The maximum number of characters allowed in a
            fragment.
        N)maxchars	frozensetsentencecharsrQ   )r    r[   r]   rQ   s       r   r"   zSentenceFragmenter.__init__  s     !&}5"r   c              #     K   | j                   }| j                  }| j                  }t        |      }d }g }d }	d}
|D ]  }|j                  }|j
                  }	|r|	|kD  r n|||}d}
|	|z
  }|
|z  }
|j                  r|j                  |j                                |	|k  se||	   |v sm|	dz   |k  r||	dz      |v r|r|
|k  rt        ||||	       g }d }d}
 |rt        ||||	       y y w)Nr      r+   )
r[   r]   rQ   r   r   r   r?   rS   rT   r   )r    r   r   r[   r]   rQ   textlenfirsttksr   
currentlenr!   r   tlengths                 r   rJ   z"SentenceFragmenter.fragment_tokens  s&    ==**NN	d)
 	AIiiGWy0}!
	)G'!Jyy

1668$  T']m%CQ;(T'A+->--O :1 seWMM
?	F seWEE s   BC1!C1)AC1NrU   r=   r   r   rX   rX      s     !$5,	#2Fr   rX   c                   $    e Zd ZdZddefdZd Zy)ContextFragmenterzTLooks for matched terms and aggregates them with their surrounding
    context.
    rY      c                 .    || _         || _        || _        y)a   
        :param maxchars: The maximum number of characters allowed in a
            fragment.
        :param surround: The number of extra characters of context to add both
            before the first matched term and after the last matched term.
        N)r[   surroundrQ   )r    r[   ri   rQ   s       r   r"   zContextFragmenter.__init__G  s     ! "r   c              #     K   | j                   }| j                  }| j                  }d }t               }d}g }	d }
d}|D ]  }|j                  }|j
                  }
|
|z
  }|r|
|kD  r n|dk  rI|j                  s=|j                  |       |rs|
|d   z
  |kD  rh|j                          |rV|
|d   z
  |kD  rnI||z   |kD  rd}n>|j                  r2|}||r|d   }n|}||z  }|	j                  |j                                |dk\  s||z  }||z  }|dk  st        ||	||
       g }	t               }d }d} |	rt        ||	||
       y y w)Nr   r   r+   )r[   ri   rQ   r   r   r   r?   rS   popleftrT   r   )r    r   r   r[   ri   rQ   ra   firsts	countdownrb   r   rc   r!   r   rd   s                  r   rJ   z!ContextFragmenter.fragment_tokensS  s    ====NN	  	
 -	#AIiiG	)GWy01}QYY i(6!9!4x!?NN$ 6!9!4x!?g%0 	$	= &q	 )!X-	

1668$ A~g%
W$	 > seWMMC"WF E!"J[-	#` seWEE s   B0E3AEE8ENrU   r=   r   r   rf   rf   B  s     !$b<M 
#DFr   rf   c                   B    e Zd ZdZdddefdZd Zd Zed        Z	d	 Z
y
)PinpointFragmenterzlThis is a NON-RETOKENIZING fragmenter. It builds fragments from the
    positions of the matched terms.
    rY   rg   Fc                 <    || _         || _        || _        || _        y)a,  
        :param maxchars: The maximum number of characters allowed in a
            fragment.
        :param surround: The number of extra characters of context to add both
            before the first matched term and after the last matched term.
        :param autotrim: automatically trims text before the first space and
            after the last space in the fragments, to try to avoid truncated
            words at the start and end. For short fragments or fragments with
            long runs between spaces this may give strange results.
        N)r[   ri   autotrimrQ   )r    r[   ri   rq   rQ   s        r   r"   zPinpointFragmenter.__init__  s      !  "r   c                      y)NFr=   r$   s    r   rE   z"PinpointFragmenter.must_retokenize  s    r   c                 h    |D cg c]  }|j                   s| }}| j                  ||      S c c}w r'   )r?   rM   )r    r   r   r!   r?   s        r   rJ   z"PinpointFragmenter.fragment_tokens  s3    $2		122$$T733 3s   //c                 z   | j                   }| j                  }| j                  }|j                  d||      }|dkD  r|dz   }|j	                  d||      }|dkD  r|}| j
                  rFt        || j
                  d   j                        }t        || j
                  d   j                        }|| _        || _        y )N r   r_   r   )r   r   r   findrfindr   r   r   )r-   r   r   r   
firstspace	lastspaces         r   	_autotrimzPinpointFragmenter._autotrim  s    }}&&	""YYsIw7
>"QIJJsIw7	q=GIx'7'7':'D'DEI'8#3#3B#7#?#?@G&"r   c              #   n  K   | j                   }| j                  }| j                  }| j                  }d}t	        |      D ]  \  }}	||k\  r|}|	j
                  }
|	j                  }|r||kD  r y ||
z
  }|t        |      dz
  k  r[||k  rV||dz      }|j                  }||z
  |k  r"||
z
  |k  r|dz  }|}|||j
                  z
  z  }nn|t        |      dz
  k  r||k  rVt        d|
|z
        }
t        t        |      ||z         }t        ||||dz    |
|      }|r| j                  |       |  y w)Nr   r_   r   )r[   ri   rq   rQ   	enumerater   r   r   r   r   r   rz   )r    r   r   r[   ri   rq   rQ   jir!   leftrightrc   nextr/   r-   s                   r   rM   z#PinpointFragmenter.fragment_matches  sT    ======NN	f% 	DAqAvA;;DIIEUY.Jc&kAo%*x*?a!e}\\:)b4i8.CFAE2#67J c&kAo%*x*? q$/*DD	58#34EfQq1uotUCHx(N3	s   CD5 AD5N)r9   r:   r;   r<   rV   r"   rE   rJ   staticmethodrz   rM   r=   r   r   ro   ro     s;     !$b5,#$4 # #&!r   ro   c                       e Zd Zy)FragmentScorerN)r9   r:   r;   r=   r   r   r   r     s    r   r   c                       e Zd Zd Zy)BasicFragmentScorerc                 |    t        d |j                  D              }|t        |j                        dz  xs dz  }|S )Nc              3   4   K   | ]  }|j                     y wr'   )boost).0r!   s     r   	<genexpr>z/BasicFragmentScorer.__call__.<locals>.<genexpr>  s     /AGG/s   d   r_   )sumr   r   r   )r    fscores      r   __call__zBasicFragmentScorer.__call__  s:    /QYY// 	#aoo&,22r   N)r9   r:   r;   r   r=   r   r   r   r     s    r   r   c                      y)z#Sorts higher scored passages first.r_   r=   r-   s    r   SCOREr     s    r   c                     | j                   S )z2Sorts passages from earlier in the document first.r   r   s    r   FIRSTr   	  s    r   c                     dt        |       z
  S )zSorts longer passages first.r   r   r   s    r   LONGERr     s    s8}r   c                     t        |       S )zSort shorter passages first.r   r   s    r   SHORTERr     s    x=r   c                 P    |r|j                   S | |j                  |j                   S )a   Convenience function for getting the text to use for a match when
    formatting.

    If ``replace`` is False, returns the part of ``original`` between
    ``token.startchar`` and ``token.endchar``. If ``replace`` is True, returns
    ``token.text``.
    )r   r   r   )originaltokenreplaces      r   get_textr     s&     zz66r   c                   8    e Zd ZdZdZd Zd	dZd	dZd	dZd Z	y)
	Formattera  Base class for formatters.

    For highlighters that return strings, it is usually only necessary to
    override :meth:`Formatter.format_token`.

    Use the :func:`get_text` function as a convenience to get the token text::

        class MyFormatter(Formatter):
            def format_token(text, token, replace=False):
                ttext = get_text(text, token, replace)
                return "[%s]" % ttext
    ...c                     |S r'   r=   r    r   s     r   _textzFormatter._text9  s    r   c                     t         )am  Returns a formatted version of the given "token" object, which
        should have at least ``startchar`` and ``endchar`` attributes, and
        a ``text`` attribute if ``replace`` is True.

        :param text: the original fragment text being highlighted.
        :param token: an object having ``startchar`` and ``endchar`` attributes
            and optionally a ``text`` attribute (if ``replace`` is True).
        :param replace: if True, the original text between the token's
            ``startchar`` and ``endchar`` indices will be replaced with the
            value of the token's ``text`` attribute.
        rG   r    r   r   r   s       r   format_tokenzFormatter.format_token<  s
     "!r   c                    g }|j                   }|j                  }|j                  D ]  }|j                   |j                   |k  r |j                   |kD  r-|j                  | j	                  |||j                                 |j                  | j                  |||             |j                  } |j                  | j	                  |||j                                dj                  |      }|S )a  Returns a formatted version of the given text, using the "token"
        objects in the given :class:`Fragment`.

        :param fragment: a :class:`Fragment` object representing a list of
            matches in the text.
        :param replace: if True, the original text corresponding to each
            match will be replaced with the value of the token object's
            ``text`` attribute.
         )r   r   r   rS   r   r   r   join)r    r-   r   outputindexr   r!   
out_strings           r   format_fragmentzFormatter.format_fragmentK  s     ""}}!! 	A{{"{{U"{{U"djjeAKK)@ABMM$++D!W=>IIE	 	djjeH,<,<!=>?WWV_
r   c                     |D cg c]  }| j                  ||       }}| j                  j                  |      S c c}w )zjReturns a formatted version of the given text, using a list of
        :class:`Fragment` objects.
        r   )r   betweenr   )r    	fragmentsr   r   	formatteds        r   formatzFormatter.formath  sK     () ))!W)= )	 )||  ++)s   <c                 $    | j                  |      S r'   )r   )r    r   r   s      r   r   zFormatter.__call__q  s    {{9%%r   NF)
r9   r:   r;   r<   r   r   r   r   r   r   r=   r   r   r   r   )  s'     G":,&r   r   c                       e Zd ZdZddZy)NullFormatterz/Formatter that does not modify the string.
    c                     t        |||      S r'   )r   r   s       r   r   zNullFormatter.format_tokenz  s    eW--r   Nr   )r9   r:   r;   r<   r   r=   r   r   r   r   v  s    .r   r   c                        e Zd ZdZddZddZy)UppercaseFormatterzBReturns a string in which the matched terms are in UPPERCASE.
    c                     || _         y)zD
        :param between: the text to add between fragments.
        N)r   )r    r   s     r   r"   zUppercaseFormatter.__init__  s    
 r   c                 <    t        |||      }|j                         S r'   )r   upper)r    r   r   r   ttxts        r   r   zUppercaseFormatter.format_token  s    eW-zz|r   N)r   r   )r9   r:   r;   r<   r"   r   r=   r   r   r   r   ~  s    r   r   c                   6    e Zd ZdZdZ	 	 	 ddZd Zd	dZd Zy)
HtmlFormattera:  Returns a string containing HTML formatting around the matched terms.

    This formatter wraps matched terms in an HTML element with two class names.
    The first class name (set with the constructor argument ``classname``) is
    the same for each match. The second class name (set with the constructor
    argument ``termclass`` is different depending on which term matched. This
    allows you to give different formatting (for example, different background
    colors) to the different terms in the excerpt.

    >>> hf = HtmlFormatter(tagname="span", classname="match", termclass="term")
    >>> hf(mytext, myfragments)
    "The <span class="match term0">template</span> <span class="match term1">geometry</span> is..."

    This object maintains a dictionary mapping terms to HTML class names (e.g.
    ``term0`` and ``term1`` above), so that multiple excerpts will use the same
    class for the same term. If you want to re-use the same HtmlFormatter
    object with different searches, you should call HtmlFormatter.clear()
    between searches to clear the mapping.
    z6<%(tag)s class=%(q)s%(cls)s%(tn)s%(q)s>%(t)s</%(tag)s>c                     || _         || _        || _        || _        || _        || _        i | _        dj                  | j                  | j                  f      | _        y)a  
        :param tagname: the tag to wrap around matching terms.
        :param between: the text to add between fragments.
        :param classname: the class name to add to the elements wrapped around
            matching terms.
        :param termclass: the class name prefix for the second class which is
            different for each matched term.
        :param maxclasses: the maximum number of term classes to produce. This
            limits the number of classes you have to define in CSS by recycling
            term class names. For example, if you set maxclasses to 3 and have
            5 terms, the 5 terms will use the CSS classes ``term0``, ``term1``,
            ``term2``, ``term0``, ``term1``.
        ru   N)	r   tagname	classname	termclass	attrquote
maxclassesseenr   	htmlclass)r    r   r   r   r   r   r   s          r   r"   zHtmlFormatter.__init__  sR    " """$	4>>4>>"BCr   c                     t        |d      S )NF)quoter   r   s     r   r   zHtmlFormatter._text  s    $e,,r   c                    | j                   }| j                  t        |||            }||v r||   }nt        |      | j                  z  }|||<   | j
                  | j                  | j                  | j                  ||dz  S )N)tagqclsr!   tn)	r   r   r   r   r   templater   r   r   )r    r   r   r   r   ttexttermnums          r   r   zHtmlFormatter.format_token  s{    yy

8D%9:D=5kG$i$//1G!DK}}t||$..'+~~E&- / / 	/r   c                     i | _         y)z@Clears the dictionary mapping terms to HTML classnames.
        N)r   r$   s    r   cleanzHtmlFormatter.clean  s     	r   N)strongr   matchterm   "r   )	r9   r:   r;   r<   r   r"   r   r   r   r=   r   r   r   r     s-    ( HH16ABD4-/r   r   c                   6    e Zd ZdZddZd Zd	dZd	dZd	dZy)
GenshiFormatterz[Returns a Genshi event stream containing HTML formatting around the
    matched terms.
    c                     || _         || _        ddlm}m}m} ddlm}m} |||c| _        | _        | _        ||c| _        | _        y)z
        :param qname: the QName for the tag to wrap around matched terms.
        :param between: the text to add between fragments.
        r   )STARTENDTEXT)AttrsStreamN)qnamer   genshi.corer   r   r   r   r   )r    r   r   r   r   r   r   r   s           r   r"   zGenshiFormatter.__init__  sA     
00-*/d'
DHdi"'
DKr   c                     |r7|d   d   | j                   k(  r"| j                   |d   d   |z   |d   d   f|d<   y |j                  | j                   |df       y )Nr   r   r_      Nr   r   )r   rS   )r    r   r   s      r   	_add_textzGenshiFormatter._add_text  sX    fRjmtyy0))VBZ]T%96":a=IF2JMM499dN;<r   c                     | j                   }t        |||      }| j                  | j                  || j	                         fdf| j
                  |df| j                  |dfg      S )Nr   )r   r   r   r   r   r   r   )r    r   r   r   qntxts         r   r   zGenshiFormatter.format_token  sc    ZZtUG,{{TZZ"djjl);^L!YY^<!XXr>:< = 	=r   c                 \   g }|j                   }|j                  }|j                  D ]P  }|j                   |kD  r| j                  |||j                    |       |j	                  |||f       |j
                  }R |t        |      k  r| j                  ||d  |       | j                  |      S r'   )r   r   r   r   rS   r   r   r   )r    r-   r   r   r   r   r!   s          r   r   zGenshiFormatter.format_fragment  s    ""}}!! 	A{{U"tE!++6?MM4G,-IIE		
 3t9NN4<0{{6""r   c                     g }d}|D ]8  }|s| j                  | j                  |       || j                  ||      z  }d}: | j                  |      S )NTr   F)r   r   r   r   )r    r   r   r   ra   r-   s         r   r   zGenshiFormatter.format  s`    ! 	Ht||V4d**8W*EEFE		
 {{6""r   N)r   r   r   )	r9   r:   r;   r<   r"   r   r   r   r   r=   r   r   r   r     s     0==##r   r   r_   c                     fd| D        }t        ||      }|D cg c]  \  }}||k\  s| }}}|j                  |       |S c c}}w )Nc              3   2   K   | ]  } |      |f  y wr'   r=   )r   r   scorers     r   r   z top_fragments.<locals>.<genexpr>  s     :1A:   key)r   sort)	r   countr   orderminscorescored_fragmentsr   sfbest_fragmentss	     `      r   top_fragmentsr     sW    :	:'78*:PYUBex>ObPNPE" Qs
   AA   queryc
                 \   |
t               }t        |      t        u r |       }t        |      t        u r |       }t        |      t        u r |       }|
t               }t        |      }
 || d|	d      }t        ||
      }|j	                  | |      }t        |||||      } || |      S )NTF)charsmoderemovestops)r   typer\   rA   rJ   r   )r   termsanalyzer
fragmenter	formattertopr   r   r   r  r@   r   r   s                r   	highlightr
    s     ~$&J4\
I$K	F|t~$&Gd$TuEF0F**48IifeXFIT9%%r   c                   L    e Zd ZddddefdZd Zed        Zed        ZddZ	y)	HighlighterNFc                     |xs
 t               | _        |xs
 t               | _        |xs t	        d      | _        || _        || _        y )Nb)r   )rf   r  r   r   r   r  r   always_retokenize)r    r  r   r  r  r   s         r   r"   zHighlighter.__init__1  sB    $;(9(;5 3 5"@mC&@
!2r   c                     | j                   ry|j                         sy| j                  j                         ry|j                  j
                  |   }|j                  d      S )NF
characters)r  has_matched_termsr  rE   searcherschemasupports)r    results	fieldnamefields       r   can_load_charszHighlighter.can_load_chars9  sY    
 !!((*??**,   ''	2~~l++r   c                    i x| j                   |<   }t        d | j                  D              }|D ]  }i ||<   	 |D ]  } ||      }| j                  j	                  ||      }	t        | j                  ||f         }
|D ]D  }||
v s|	j                  |       |	j                         |k(  sJ |	j                  d      ||   |<   F  y )Nc              3   &   K   | ]	  \  }}|  y wr'   r=   )r   _docnums      r   r   z*Highlighter._load_chars.<locals>.<genexpr>R  s     Byq&FBs   r  )
_char_cachesortedtop_nr  postingsr   termdocsskip_tor6   value_as)r  r  textsto_bytescache
sorted_idsr  r   btextmdocsets              r   _load_charszHighlighter._load_charsL  s    
 243I&BGMMBB
  	FE&M	  	CDTNE  )))U;A))9e*<=>F$ CV#IIf%446V+++*+**\*BE&M$'	C		Cr   c              #     K   d }| D ]  }|j                   s|| d }| ||j                         }/|j                  |j                  k  re|j                  |j                  kD  sb|xj                  |j                  |j                  |j                  z
  d  z  c_        |j                  |_        | d }|  || y y wr'   )r?   rT   r   r   r   )r   r   r!   s      r   _merge_matched_tokensz!Highlighter._merge_matched_tokensa  s     
  	A99$K E}-99u}},JJ!&&qyy)@)A"BBJ$%IIEM%	( K s   A&C)ACc           
         |j                   }|j                  j                  }|   }|j                  }	|j                  ||vrt        dz        |   }|j                         rfd|j                         D        }
n|j                  d      }
t        fd|
D              }| j                  |      r	|j                  vr| j                  |||	       fd|j                         D        }|j                     |j                     }g }| j                  j                  }|D ]9  }||   }|D ]-  \  }}}|r||kD  r |j!                  t#        ||||             / ; |j%                  d 	       t'        |d
       D cg c]  \  }}t)        |d 	       }}}| j                  j+                  ||      }ni|j                  j                     j,                  } ||dddd      }t/        ||      }| j1                  |      }| j                  j3                  ||      }t5        ||| j6                  | j8                  |      }| j:                  j=                  |      }|S c c}}w )NzField %r is not stored.c              3   2   K   | ]  }|d    k(  r|  yw)r   Nr=   )r   r   r  s     r   r   z,Highlighter.highlight_hit.<locals>.<genexpr>  s$      /taI-  /r   T)expandr  c              3   4   K   | ]  } |d            yw)r_   Nr=   )r   r   
from_bytess     r   r   z,Highlighter.highlight_hit.<locals>.<genexpr>  s     A$*T!W-As   c              3   D   K   | ]  }|d    k(  r |d           yw)r   r_   Nr=   )r   r   r  r3  s     r   r   z,Highlighter.highlight_hit.<locals>.<genexpr>  s-      17i/ #47+ 1s    )r   posr   r   c                     | j                   S r'   r   r!   s    r   <lambda>z+Highlighter.highlight_hit.<locals>.<lambda>  s
    akk r   r   c                     | j                   S r'   r   r7  s    r   r8  z+Highlighter.highlight_hit.<locals>.<lambda>  s
    !++ r   c                 4    | j                   | j                  z
  S r'   r(   r7  s    r   r8  z+Highlighter.highlight_hit.<locals>.<lambda>  s    qyy1;;/F r   r   F)	positionsr  r  r  )r   )r  r  r  r&  r3  KeyErrorr  r   query_termsr\   r  r  r,  r  r  rQ   rS   r	   r   r   r   rM   r  rA   r.  rJ   r   r   r   r  r   )r    hitobjr  r   r	  r   r  r  r  r&  btermswordshittermscmapr   rQ   wordr  r5  r   r   r   groupr   r  r   r3  s     `                       @r   highlight_hitzHighlighter.highlight_hit  s   ..!!((y!>>%%
<&89DEE)$D $$&/w'<'<'> /F ((	(JFA&AA w	2 3 33  )UHE18L8L8N 1H &&y1&--@DF11I  OT
/4 O+CG Wy%8MM%Ts2;W#N OOO KK1K2(/8M(NP$#u %%FG PF P88vFI ''..y9BBHdd$W*/1F (6F//7F77fEI!)S$++tzz+35	&&y1!Ps   I2)Nr   r_   )
r9   r:   r;   r   r"   r  r   r,  r.  rE  r=   r   r   r  r  0  sG    "&tt#(3,& C C(  :;r   r  )NNr   r   )r_   )'r<   
__future__r   collectionsr   heapqr   	itertoolsr   whoosh.compatr   whoosh.analysisr	   rV   r   objectr   rA   rC   rO   NullFragmeterrX   rf   ro   r   r   r   r   r   r   r   r   r   r   r   r   r   r
  r  r=   r   r   <module>rN     s5  8.      $ !  
 26%&6"H$v H$Z"" ""J#)j #)N  HF HFVUF
 UFpS Sp	V 		. 	


7J& J&Z.I .  DI DN6#i 6#v ABAU&0J& Jr   