
    =wgBA                        d Z ddlmZ ddlmZmZ ddlmZmZm	Z	m
Z
 ddlmZ ddlmZmZ ddlmZmZmZmZ  G d d	e      Zd
 Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Zy)z
The classes in this module encode and decode posting information for a field.
The field format essentially determines what information is stored about each
occurance of a term.
    )defaultdict)	unstoppedentoken)	iteritemsdumpsloadsb
emptybytes)	_INT_SIZE_FLOAT_SIZE)	pack_uintunpack_uint
pack_floatunpack_floatc                   \    e Zd ZdZdZdZ ee      ZddZ	d Z
d Zd Zd	 Zd
 Zd Zd Zy)Formata  Abstract base class representing a storage format for a field or vector.
    Format objects are responsible for writing and reading the low-level
    representation of a field. It controls what kind/level of information to
    store about the indexed fields.
    Tfield_boostc                      || _         || _        y)z
        :param field_boost: A constant boost factor to scale to the score
            of all queries matching terms in this field.
        Nr   optionsselfr   r   s      E/var/www/horilla/myenv/lib/python3.12/site-packages/whoosh/formats.py__init__zFormat.__init__8   s     '    c                 p    |xr3 | j                   |j                   u xr | j                  |j                  k(  S N)	__class____dict__)r   others     r   __eq__zFormat.__eq__A   s4     4NNeoo54MMU^^3	5r   c                 N    | j                   j                  d| j                  dS )Nz(boost=))r!   __name__r   r   s    r   __repr__zFormat.__repr__F   s    !%!8!8$:J:JKKr   c                 :    | j                   dk  ry | j                   S Nr   )posting_sizer(   s    r   fixed_value_sizezFormat.fixed_value_sizeI   s    q    r   c                     t         )a  Takes the text value to be indexed and yields a series of
        ("tokentext", frequency, weight, valuestring) tuples, where frequency
        is the number of times "tokentext" appeared in the value, weight is the
        weight (a float usually equal to frequency in the absence of per-term
        boosts) and valuestring is encoded field-specific posting value for the
        token. For example, in a Frequency format, the value string would be
        the same as frequency; in a Positions format, the value string would
        encode a list of token positions at which "tokentext" occured.

        :param value: The unicode text to index.
        :param analyzer: The analyzer to use to process the text.
        )NotImplementedError)r   valueanalyzerkwargss       r   word_valueszFormat.word_valuesN   s
     "!r   c                      t        | d|z         S )zReturns True if this format supports interpreting its posting
        value as 'name' (e.g. "frequency" or "positions").
        decode_)hasattrr   names     r   supportszFormat.supports^   s     tY-..r   c                      t        | d|z         S )zReturns the bound method for interpreting value as 'name',
        where 'name' is for example "frequency" or "positions". This
        object must have a corresponding Format.decode_<name>() method.
        r5   )getattrr7   s     r   decoderzFormat.decoderd   s    
 tY-..r   c                 0     | j                  |      |      S )zInterprets the encoded value string as 'astype', where 'astype' is
        for example "frequency" or "positions". This object must have a
        corresponding decode_<astype>() method.
        )r<   )r   astypevaluestrings      r   	decode_aszFormat.decode_ask   s    
 $t||F#K00r   N      ?)r'   
__module____qualname____doc__r,   textualdictfloat__inittypes__r   r$   r)   r-   r3   r9   r<   r@    r   r   r   r   -   sF     LGU+M5
L!
" //1r   r   c                     t        | t        t        f      rt        | fi |}t	        |      S  || fi |}t	        |      S r    )
isinstancetuplelistr   r   )r0   r1   r2   gens       r   tokensrP   y   sF    %%'e&v& S> u''S>r   c                   L    e Zd ZdZdZ ee      ZddZd Z	d Z
d Zd Zd	 Zy
)	Existencea  Only indexes whether a given term occurred in a given document; it does
    not store frequencies or positions. This is useful for fields that should
    be searchable but not scorable, such as file path.

    Supports: frequency, weight (always reports frequency = 1).
    r   r   c                      || _         || _        y r    r   r   s      r   r   zExistence.__init__   s    &r   c                 l    | j                   t        d t        |||      D              }fd|D        S )Nc              3   4   K   | ]  }|j                     y wr    )text).0ts     r   	<genexpr>z(Existence.word_values.<locals>.<genexpr>   s     FaffFs   c              3   2   K   | ]  }|d t         f  yw   Nr
   )rW   wfbs     r   rY   z(Existence.word_values.<locals>.<genexpr>   s     81Ar:&8s   )r   setrP   )r   r0   r1   r2   wordsetr^   s        @r   r3   zExistence.word_values   s2    FfUHf&EFF888r   c                     t         S r    r
   )r   r0   s     r   encodezExistence.encode       r   c                      yNr\   rJ   r   r?   s     r   decode_frequencyzExistence.decode_frequency   s    r   c                     | j                   S r    r   rf   s     r   decode_weightzExistence.decode_weight   s    r   c                     t         S r    r
   r   vss     r   combinezExistence.combine   rc   r   NrA   )r'   rC   rD   rE   r,   rG   rH   rI   r   r3   rb   rg   ri   rm   rJ   r   r   rR   rR      s6     LU+M9
 r   rR   c                   H    e Zd ZdZeZ eee      Z	d	dZ
d Zd Zd Zd Zy)
	FrequencyzUStores frequency information for each posting.

    Supports: frequency, weight.
    )r   boost_as_freqc                 D    t        |t              sJ || _        || _        y)z
        :param field_boost: A constant boost factor to scale to the score of
            all queries matching terms in this field.
        N)rL   rH   r   r   )r   r   rp   r   s       r   r   zFrequency.__init__   s#     +u---&r   c                 4  	 | j                   d}t        t              }t        t              	d|d<   t	        |||      D ]?  }|dz  }||j
                  xx   dz  cc<   	|j
                  xx   |j                  z  cc<   A 	fdt        |      D        }|S )Nr   Tboostsr\   c              3   N   K   | ]  \  }}|||   z  t        |      f  y wr    )r   )rW   r]   freqr^   weightss      r   rY   z(Frequency.word_values.<locals>.<genexpr>   s.      $wq$4b)D/: $s   "%)r   r   intrH   rP   rV   boostr   )
r   r0   r1   r2   lengthfreqsrX   wvsr^   rv   s
           @@r   r3   zFrequency.word_values   s    C e$xx0 	'AaKF!&&MQMAFFOqww&O	'
$E"$
r   c                     t        |      d   S r+   )r   rf   s     r   rg   zFrequency.decode_frequency   s    ;'**r   c                 <    t        |      d   }|| j                  z  S r+   )r   r   )r   r?   ru   s      r   ri   zFrequency.decode_weight   s"    ;'*d&&&&r   c                 >     t        t         fd|D                    S )Nc              3   @   K   | ]  }j                  |        y wr    )decode_value)rW   vr   s     r   rY   z$Frequency.combine.<locals>.<genexpr>   s     >aT..q1>s   )r   sumrk   s   ` r   rm   zFrequency.combine   s    >2>>??r   N)rB   F)r'   rC   rD   rE   r   r,   rG   rH   boolrI   r   r3   rg   ri   rm   rJ   r   r   ro   ro      s4    
 LU$?M	 +'@r   ro   c                   :    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
y	)
	PositionszStores position information in each posting, to allow phrase searching
    and "near" queries.

    Supports: frequency, weight, positions, position_boosts (always reports
    position boost = 1.0).
    c              +     K   | j                   }t        t              }t        t              }d|d<   d|d<   t	        |||      D ]K  }||j
                     j                  |j                         ||j
                  xx   |j                  z  cc<   M t        |      D ]-  \  }}	| j                  |	      }|t        |	      ||   |z  |f / y w)NT	positionsrs   )r   r   rN   rH   rP   rV   appendposrx   r   rb   len)
r   r0   r1   r2   r^   posesrv   rX   r]   poslists
             r   r3   zPositions.word_values   s     D!e$"{xx0 	'A!&&M  'AFFOqww&O	' $E* 	<JAwKK(Ec'lGAJOU;;	<s   CCc                     g }d}|D ]  }|j                  ||z
         |} t        t        |            t        |d      z   S Nr      r   r   r   r   )r   r   deltasbaser   s        r   rb   zPositions.encode   sM     	CMM#*%D	 V%fa(888r   c                     |j                  t        d            s|t        d      z  }t        |t        d        }d}g }|D ]  }||z  }|j	                  |        |S N.r   endswithr	   r   r   r   )r   r?   codespositionr   codes         r   decode_positionszPositions.decode_positions   sl    ##AcF+1S6!Kk)*-.	 	'DHX&	' r   c                 ,    t        |d t               d   S r+   )r   r   rf   s     r   rg   zPositions.decode_frequency   s    ;z	23A66r   c                 >    | j                  |      | j                  z  S r    )rg   r   rf   s     r   ri   zPositions.decode_weight   s    $$[1D4D4DDDr   c                 N    | j                  |      D cg c]  }|df c}S c c}w re   )r   )r   r?   r   s      r   decode_position_boostsz Positions.decode_position_boosts   s%    $($9$9+$FGSaGGGs   "c                     t               }|D ]"  }|j                  | j                  |             $ | j                  t	        |            S r    )r_   updater   rb   sorted)r   rl   sr   s       r   rm   zPositions.combine  sB    E 	/AHHT**1-.	/{{6!9%%r   N)r'   rC   rD   rE   r3   rb   r   rg   ri   r   rm   rJ   r   r   r   r      s,    <9	7EH&r   r   c                   .    e Zd ZdZd Zd Zd Zd Zd Zy)
CharacterszStores token position and character start and end information for each
    posting.

    Supports: frequency, weight, positions, position_boosts (always reports
    position boost = 1.0), characters.
    c              +     K   | j                   }t        t              }t        t              }d|d<   d|d<   d|d<   t	        |||      D ]b  }||j
                     j                  |j                  |j                  |j                  f       ||j
                  xx   |j                  z  cc<   d t        |      D ]-  \  }}	| j                  |	      }|t        |	      ||   |z  |f / y wNTr   charsrs   )r   r   rN   rH   rP   rV   r   r   	startcharendcharrx   r   rb   r   )
r   r0   r1   r2   r^   seenrv   rX   r]   r   s
             r   r3   zCharacters.word_values  s     4 e$"{wxx0 	'ALQYY ?@AFFOqww&O	' $D/ 	<JAwKK(Ec'lGAJOU;;	<s   C(C*c                     g }d}d}|D ]'  \  }}}|j                  ||z
  ||z
  ||z
  f       |}|}) t        t        |            t        |d      z   S r   r   )r   r   r   posbasecharbaser   r   r   s           r   rb   zCharacters.encode"  st    '. 	#CGMM3=)h*>"Y.0 1GH		
 V%fa(888r   c                     |j                  t        d            s|t        d      z  }t        |t        d        }d}d}g }|D ].  }|d   |z   }|d   |z   }|d   |z   }|j	                  |||f       0 |S )Nr   r   r\   r   r   )r   r?   r   r   r   posns_charsr   r   s           r   decode_characterszCharacters.decode_characters-  s    ##AcF+1S6!Kk)*-. 	?DAw)HQ')I1g	)G)W=>		?
 r   c                     |j                  t        d            s|t        d      z  }t        |t        d        }d}g }|D ]  }|d   |z   }|j	                  |        |S r   r   r   r?   r   r   posnsr   s         r   r   zCharacters.decode_positions;  sm    ##AcF+1S6!Kk)*-. 	#DAw)HLL"	# r   c                 <   i }|D ]K  }| j                  |      D ]5  \  }}}||v r$||   \  }}t        ||      t        ||      f||<   /||f||<   7 M t        |j	                               D cg c]  }|||   d   ||   d   f }	}| j                  |	      S c c}w )Nr   r\   )r   minmaxr   keysrb   )
r   rl   r   r   r   scecold_scold_ecr   s
             r   rm   zCharacters.combineF  s     	&A#55a8 &R!8%(VNFF!"fos2v?AcF "XAcF&	& 9?qvvx8HI#qvay!C&),II{{5!! Js   .BN)	r'   rC   rD   rE   r3   rb   r   r   rm   rJ   r   r   r   r   
  s     < 	9	
"r   r   c                   4    e Zd ZdZd Zd Zd Zd Zd Zd Z	y)	PositionBoostszA format that stores positions and per-position boost information
    in each posting.

    Supports: frequency, weight, positions, position_boosts.
    c              +   z  K   | j                   }t        t              }d|d<   d|d<   t        |||      D ]:  }|j                  }|j
                  }||j                     j                  ||f       < t        |      D ]:  \  }	}
| j                  |
      }|	t        |
      t        d |
D              |z  |f < y w)NTr   rs   c              3   &   K   | ]	  }|d      ywr[   rJ   )rW   ps     r   rY   z-PositionBoosts.word_values.<locals>.<genexpr>g  s     %:qad%:s   )r   r   rN   rP   r   rx   rV   r   r   rb   r   r   )r   r0   r1   r2   r^   r   rX   r   rx   r]   r   s              r   r3   zPositionBoosts.word_valuesZ  s     4 "{xx0 	.A%%CGGELe-	.
 "$ 	HHAuKK&Ec%j#%:E%:":R"?GG	Hs   B9B;c                     g }d}d}|D ]"  \  }}||z  }|j                  ||z
  |f       |}$ t        t        |            t        |      z   t	        |d      z   S r   )r   r   r   r   r   )r   r   r   r   summedboostr   rx   s          r   rb   zPositionBoosts.encodei  ss     	JC5 KLL#*e,-D	 #e*%
;(??q/" 	#r   c                     |j                  t        d            s|t        d      z  }t        |t        t        z   d        }d}g }|D ]   }|d   |z   }|j                  ||d   f       " |S )Nr   r   r\   r   r	   r   r   r   r   )r   r?   r   r   posns_boostsr   s         r   r   z%PositionBoosts.decode_position_boostst  s    ##AcF+1S6!Kk)k"9":;< 	5DAw)H47 34	5 r   c                     |j                  t        d            s|t        d      z  }t        |t        t        z   d        }d}g }|D ]  }|d   |z   }|j                  |        |S r   r   r   s         r   r   zPositionBoosts.decode_positions  ss    ##AcF+1S6!Kk)k"9":;< 	#DAw)HLL"	# r   c                 `    t        |t        t        t        z          d   }|| j                  z  S r+   )r   r   r   r   )r   r   r   s      r   ri   zPositionBoosts.decode_weight  s.    "1Yy;/F#GHKT----r   c                     t        t              }|D ](  }| j                  |      D ]  \  }}||xx   |z  cc<    * | j                  t	        |j                                     S r    )r   rH   r   rb   r   items)r   rl   r   r   r   rx   s         r   rm   zPositionBoosts.combine  sb     	 A"99!<  
U#% 	  {{6!''),--r   N)
r'   rC   rD   rE   r3   rb   r   r   ri   rm   rJ   r   r   r   r   S  s&    H	#		..r   r   c                   :    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
y	)
CharacterBoostszA format that stores positions, character start and end, and
    per-position boost information in each posting.

    Supports: frequency, weight, positions, position_boosts, characters,
    character_boosts.
    c              +   p  K   t        t              }d|d<   d|d<   d|d<   t        |||      D ]L  }||j                     j	                  |j
                  |j                  |j                  |j                  f       N t        |      D ]*  \  }}| j                  |      \  }}|t        |      ||f , y wr   )r   rN   rP   rV   r   r   r   r   rx   r   rb   r   )	r   r0   r1   r2   r   rX   r]   r   r   s	            r   r3   zCharacterBoosts.word_values  s     4 "{wxx0 	JALQYY HI	J "$ 	6HAu!%U!3E;c%j+u55	6s   B4B6c                     | j                   }g }d}d}d}|D ].  \  }}}	}
|j                  ||z
  ||z
  |	|z
  |
f       |}|	}||
z  }0 t        t        |            t	        ||z        z   t        |d      z   |fS r   )r   r   r   r   r   r   )r   r   r^   r   r   r   r   r   r   r   rx   s              r   rb   zCharacterBoosts.encode  s    .3 	!*CGULL#-X)=!I-u6 7GH5 K	! 3u:&K"4D)EE?#%02 	2r   c                    |j                  t        d            s|t        d      z  }t        |t        t        z   d        }d}d}g }|D ]2  }||d   z   }||d   z   }||d   z   }|j                  ||||d   f       4 |S )Nr   r   r\   r      r   )r   r?   r   r   r   posn_char_boostsr   r   s           r   decode_character_boostsz'CharacterBoosts.decode_character_boosts  s    ##AcF+1S6!Kk)k"9":;< 	MD$q')H$q')I$q')G##Xy'47$KL		M
  r   c                 P    | j                  |      D cg c]  }|d   	 c}S c c}w r+   r   )r   r?   items      r   r   z CharacterBoosts.decode_positions  s%    $($@$@$MNDQNNNs   #c           
      l    | j                  |      D cg c]  \  }}}}|||f c}}}}S c c}}}}w r    r   )r   r?   r   r   r   _s         r   r   z!CharacterBoosts.decode_characters  s>    //<> >.Hc9gqi) > 	> >s   .
c           	      d    | j                  |      D cg c]  \  }}}}||f c}}}S c c}}}w r    r   )r   r?   r   r   rx   s        r   r   z&CharacterBoosts.decode_position_boosts  s;    //<> >!1aEe > 	> >s   +c           
      Z   i }|D ]R  }| j                  |      D ]<  \  }}}}||v r)||   \  }}	}
t        ||      t        ||	      |
|z   f||<   5|||f||<   > T t        |j	                               D cg c]  \  }\  }}}||||f }}}}}| j                  |      d   S c c}}}}w r+   )r   r   r   r   r   rb   )r   rl   r   r   r   r   r   rx   r   r   	old_boostr   s               r   rm   zCharacterBoosts.combine  s     	-A&*&B&B1&E -"RU!803A-FFI!"fos2v'%/1AcF !"e_AcF-	- 1779%' '*>#B#r2u% ' '{{5!!$$'s   8B%
N)r'   rC   rD   rE   r3   rb   r   r   r   r   rm   rJ   r   r   r   r     s+    62" O>>%r   r   N)rE   collectionsr   whoosh.analysisr   r   whoosh.compatr   r   r   r	   whoosh.systemr   r   r   r   r   r   r   objectr   rP   rR   ro   r   r   r   r   rJ   r   r   <module>r      s   8 $ . 4 4 $ 0 J J
C1V C1X B,@ ,@^6& 6&rF" F"R@.Y @.FK%j K%r   