
    =wg0                         d Z ddlmZ ddlmZmZ ddlmZ ddlm	Z	m
Z
  G d de      Z G d d	e      Z G d
 de      Z G d de      Z G d de      Z G d de      Z G d de      Zy)zM
This module contains helper functions for correcting typos in user queries.
    )bisect_left)heappushheapreplace)	highlight)	iteritemsxrangec                       e Zd ZdZddZd Zy)	Correctorz|
    Base class for spelling correction objects. Concrete sub-classes should
    implement the ``_suggestions`` method.
    c                     | j                   }g } ||||      D ]2  }t        |      |k  rt        ||       ||d   kD  s't        ||       4 t	        |d       }|D 	
cg c]  \  }	}
|
	 c}
}	S c c}
}	w )aR  
        :param text: the text to check. This word will **not** be added to the
            suggestions, even if it appears in the word graph.
        :param limit: only return up to this many suggestions. If there are not
            enough terms in the field within ``maxdist`` of the given word, the
            returned list will be shorter than this number.
        :param maxdist: the largest edit distance from the given word to look
            at. Values higher than 2 are not very effective or efficient.
        :param prefix: require suggestions to share a prefix of this length
            with the given word. This is often justifiable since most
            misspellings do not involve the first letter of the word. Using a
            prefix dramatically decreases the time it takes to generate the
            list of words.
        r   c                     d| d   z
  | d   fS )Nr       )xs    F/var/www/horilla/myenv/lib/python3.12/site-packages/whoosh/spelling.py<lambda>z#Corrector.suggest.<locals>.<lambda>I   s    1qt8QqT*:     )key)_suggestionslenr   r   sorted)selftextlimitmaxdistprefixr   heapitemsugs_sugs              r   suggestzCorrector.suggest/   s~      (( w7 	(D4y5 t$QD$'	( d :;"&'3'''s   !A1c                     t         )a)  
        Low-level method that yields a series of (score, "suggestion")
        tuples.

        :param text: the text to check.
        :param maxdist: the maximum edit distance.
        :param prefix: require suggestions to share a prefix of this length
            with the given word.
        NotImplementedError)r   r   r   r   s       r   r   zCorrector._suggestionsL   s
     "!r   N)      r   )__name__
__module____qualname____doc__r!   r   r   r   r   r
   r
   )   s    
(:"r   r
   c                       e Zd ZdZd Zd Zy)ReaderCorrectorz
    Suggests corrections based on the content of a field in a reader.

    Ranks suggestions by the edit distance, then by highest to lowest
    frequency.
    c                 .    || _         || _        || _        y N)reader	fieldnamefieldobj)r   r/   r0   r1   s       r   __init__zReaderCorrector.__init__b   s    " r   c              #   
  K   | j                   }|j                  }| j                  }|j                  |   }|j	                  |      }|j                  ||||      D ]#  }	 |||	      xs d}
d|d|
z  dz  z   z
  }||	f % y w)N)r   r   r   g      ?g      ?)r/   	frequencyr0   schemaspelling_fieldnameterms_within)r   r   r   r   r/   freqr0   r1   sugfieldr    fscores               r   r   zReaderCorrector._suggestionsg   s     NN	==+..y9&&xwv&N 	CY$)AC!GcM23E#,		s   BBNr'   r(   r)   r*   r2   r   r   r   r   r,   r,   Z   s    !
r   r,   c                   2    e Zd ZdZd Zd Z G d de      Zy)ListCorrectorzP
    Suggests corrections based on the content of a sorted list of strings.
    c                     || _         y r.   )wordlist)r   r@   s     r   r2   zListCorrector.__init__{   s	     r   c              #      K   ddl m} ddlm} t	               }t        d|dz         D ]b  } ||||      j                         }| j                  | j                        }	 |||	      D ]!  }
|
|vs|j                  |
       d|z
  |
f # d y w)Nr   )levenshtein_automaton)find_all_matchesr   )
whoosh.automata.levrB   whoosh.automata.fsarC   setr   to_dfaSkipperr@   add)r   r   r   r   rB   rC   seenidfaskr    s              r   r   zListCorrector._suggestions~   s     =8u7Q;' 	-A'gv>EEGCdmm,B'R0 -d?HHSMw;,,-	-s   A,B/Bc                       e Zd Zd Zd Zy)ListCorrector.Skipperc                      || _         d| _        y )Nr   )datarK   )r   rQ   s     r   r2   zListCorrector.Skipper.__init__   s    DIDFr   c                     | j                   | j                     |k(  r|S | xj                  dz  c_        t        | j                   || j                        }|t        | j                         k  r| j                   |   S y )Nr   )rQ   rK   r   r   )r   wposs      r   __call__zListCorrector.Skipper.__call__   s`    yy A%FFaKFdiiDFF3CS^#yy~%r   N)r'   r(   r)   r2   rU   r   r   r   rH   rO      s    		r   rH   N)r'   r(   r)   r*   r2   r   objectrH   r   r   r   r>   r>   v   s    !-& r   r>   c                       e Zd ZdZd Zd Zy)MultiCorrectorz;
    Merges suggestions from a list of sub-correctors.
    c                      || _         || _        y r.   )
correctorsop)r   rZ   r[   s      r   r2   zMultiCorrector.__init__   s    $r   c                     | j                   }i }| j                  D ]6  }|j                  |||      D ]  \  }}||v r |||   |      ||<   |||<     8 t        |      S r.   )r[   rZ   r   r   )	r   r   r   r   r[   rJ   corrr;   r    s	            r   r   zMultiCorrector._suggestions   su    WWOO 	&D"//gvF &
s$; "49e 4DI %DI	&	& r   Nr<   r   r   r   rX   rX      s    	r   rX   c                   "    e Zd ZdZd Zd Zd Zy)
Correctiona  
    Represents the corrected version of a user query string. Has the
    following attributes:

    ``query``
        The corrected :class:`whoosh.query.Query` object.
    ``string``
        The corrected user query string.
    ``original_query``
        The original :class:`whoosh.query.Query` object that was corrected.
    ``original_string``
        The original user query string.
    ``tokens``
        A list of token objects representing the corrected words.

    You can also use the :meth:`Correction.format_string` method to reformat the
    corrected query string using a :class:`whoosh.highlight.Formatter` class.
    For example, to display the corrected query string as HTML with the
    changed words emphasized::

        from whoosh import highlight

        correction = mysearcher.correct_query(q, qstring)

        hf = highlight.HtmlFormatter(classname="change")
        html = correction.format_string(hf)
    c                     || _         || _        || _        || _        | j                  r)| j	                  t        j                               | _        y d| _        y )N )original_queryqueryoriginal_stringtokensformat_stringr   NullFormatterstring)r   qqstringcorr_qre   s        r   r2   zCorrection.__init__   sK    
&,,Y-D-D-FGDKDKr   c                 h    | j                   j                  d| j                  d| j                  dS )N(z, ))	__class__r'   rc   rh   r   s    r   __repr__zCorrection.__repr__   s&    #~~66

#{{, 	,r   c                     | j                   syt        |t              r |       }t        j                  | j                   | j
                        }|j                  |d      S )a  
        Highlights the corrected words in the original query string using the
        given :class:`~whoosh.highlight.Formatter`.

        :param formatter: A :class:`whoosh.highlight.Formatter` instance.
        :return: the output of the formatter (usually a string).
        ra   T)replace)rd   
isinstancetyper   Fragmentre   format_fragment)r   	formatterfragments      r   rf   zCorrection.format_string   sS     ##i&!I%%d&:&:DKKH((4(@@r   N)r'   r(   r)   r*   r2   rq   rf   r   r   r   r_   r_      s    8	,Ar   r_   c                   "    e Zd ZdZd Zd Zd Zy)QueryCorrectorzD
    Base class for objects that correct words in a user query.
    c                     || _         y r.   r0   )r   r0   s     r   r2   zQueryCorrector.__init__   s	    "r   c                     t         )a  
        Returns a :class:`Correction` object representing the corrected
        form of the given query.

        :param q: the original :class:`whoosh.query.Query` tree to be
            corrected.
        :param qstring: the original user query. This may be None if the
            original query string is not available, in which case the
            ``Correction.string`` attribute will also be None.
        :rtype: :class:`Correction`
        r#   )r   ri   rj   s      r   correct_queryzQueryCorrector.correct_query   s
     "!r   c                     | j                   S r.   r}   rp   s    r   fieldzQueryCorrector.field	  s    ~~r   N)r'   r(   r)   r*   r2   r   r   r   r   r   r{   r{      s    #"r   r{   c                       e Zd ZdZddZd Zy)SimpleQueryCorrectora  
    A simple query corrector based on a mapping of field names to
    :class:`Corrector` objects, and a list of ``("fieldname", "text")`` tuples
    to correct. And terms in the query that appear in list of term tuples are
    corrected using the appropriate corrector.
    Nc                 d    || _         |xs i | _        t        |      | _        || _        || _        y)a  
        :param correctors: a dictionary mapping field names to
            :class:`Corrector` objects.
        :param terms: a sequence of ``("fieldname", "text")`` tuples
            representing terms to be corrected.
        :param aliases: a dictionary mapping field names in the query to
            field names for spelling suggestions.
        :param prefix: suggested replacement words must share this number of
            initial characters with the original word. Increasing this even to
            just ``1`` can dramatically speed up suggestions, and may be
            justifiable since spellling mistakes rarely involve the first
            letter of a word.
        :param maxdist: the maximum number of "edits" (insertions, deletions,
            subsitutions, or transpositions of letters) allowed between the
            original word and any suggestion. Values higher than ``2`` may be
            slow.
        N)rZ   aliases	frozensettermsetr   r   )r   rZ   termsr   r   r   s         r   r2   zSimpleQueryCorrector.__init__  s0    & %}" 'r   c                    | j                   }| j                  }| j                  }| j                  }| j                  }g }|}	|j                         D ]  }
|
j                  }|j                  ||      }||
j                  f|v s2||   }|j                  |
j                  ||      }|sX|d   }|	j                  |
j                  |
j                  |      }	|
j                  |
_        ||
_        |j                  |
        t        |||	|      S )N)r   r   r   )rZ   r   r   r   r   
all_tokensr0   getr   r!   rs   originalappendr_   )r   ri   rj   rZ   r   r   r   r   corrected_tokenscorrected_qtokenfnameanamecr   r    s                   r   r   z"SimpleQueryCorrector.correct_query.  s	   __
,,,,,, 
 
 \\^ 	3EOOEKKu-E uzz"g-u%yyFGyL q'C #."5"5eoo6;jj##GK &+ZZEN!$EJ$++E2+	3. !Wk3CDDr   )Nr   r&   )r'   r(   r)   r*   r2   r   r   r   r   r   r     s    2)Er   r   N)r*   bisectr   heapqr   r   whooshr   whoosh.compatr   r   rV   r
   r,   r>   rX   r_   r{   r   r   r   r   <module>r      su   8  '  +
." ."bi 8"I "JY .;A ;A@V 6JE> JEr   