
    =wg}                     z   d dl mZ d dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ d dlmZ  G d de      Zed        Zd Zd Z d Z!d Z" G d de      Z# G d de$      Z% G d de%      Z& G d dejN                  e%      Z(d dZ) G d de%      Z*e*Z+y)!    )with_statementN)bisect_right)contextmanager)columns)abstractmethod
bytes_type)SortingPoolUnknownFieldError)	LockError)
emptybytes)fibrandom_name)try_for)
utf8encodec                       e Zd Zy)IndexingErrorN)__name__
__module____qualname__     E/var/www/horilla/myenv/lib/python3.12/site-packages/whoosh/writing.pyr   r   .   s    r   r   c              #   T   K   | j                          d  | j                          y wN)start_group	end_group)writers    r   groupmanagerr   4   s!     
	
s   &(c                     |S )z6This policy does not merge any existing segments.
    r   r   segmentss     r   NO_MERGEr#   B   s	     Or   c                    ddl m} g }g }t        |d       }d}d}t        |      D ]`  \  }}	|	j	                         }
|
dkD  r||
z  }|r|j                  |	       4|j                  |	|f       |dkD  sM|t        |dz         k  s_d}b |rYt        |      d	kD  rK|D ]D  \  }	} || j                  | j                  |	      }| j                  |       |j                          F |S |S )
zwThis policy merges small segments, where "small" is defined using a
    heuristic based on the fibonacci sequence.
    r   SegmentReaderc                 "    | j                         S r   doc_count_all)ss    r   <lambda>zMERGE_SMALL.<locals>.<lambda>R   s    9J r   )keyF      T   )whoosh.readingr&   sorted	enumerater)   appendr   lenstorageschema
add_readerclose)r   r"   r&   unchanged_segmentssegments_to_mergesorted_segment_list
total_docsmerge_point_foundisegcountreaders               r   MERGE_SMALLrB   H   s   
 - /JKJ/0 
)3!!#19%J%%c*$$c1X.1uc!a%j0$(!
) S!23a7' 	FC"6>>6==#FFf%LLN	 "!r   c                     ddl m} |D ]A  } || j                  | j                  |      }| j	                  |       |j                          C g S )z.This policy merges all existing segments.
    r   r%   )r0   r&   r5   r6   r7   r8   )r   r"   r&   r?   rA   s        r   OPTIMIZErD   l   sJ     - v~~v}}cB&! Ir   c                     g S )zSThis policy DELETES all existing segments and only writes the new
    segment.
    r   r!   s     r   CLEARrF   y   s	    
 Ir   c                   <    e Zd ZdZd
dZd Zd Zd Zd Zd Z	d Z
y	)PostingPool$abcdefghijklmnopqrstuvwxyz0123456789c                     t        j                  | fi | || _        || _        |dz  dz  | _        d| _        t               | _        y )Ni   r   )r	   __init__	tempstoresegmentlimitcurrentsizeset
fieldnames)selfrL   rM   limitmbkwargss        r   rK   zPostingPool.__init__   sD    T,V,"t^d*
%r   c                 v    dt               z  }| j                  j                  |      j                         }||fS )Nz%s.run)r   rL   create_fileraw_file)rR   pathfs      r   _new_runzPostingPool._new_run   s4    +-'NN&&t,557Qwr   c                 T    | j                   j                  |      j                         S r   )rL   	open_filerW   rR   rX   s     r   	_open_runzPostingPool._open_run   s     ~~''-6688r   c                 8    | j                   j                  |      S r   )rL   delete_filer]   s     r   _remove_runzPostingPool._remove_run   s    ~~))$//r   c                     t        |d   t              sJ d|d   z         |d    t        |d   t              sJ d|d   z         | j                  j                  |d          dt	        |d         z   dz   t	        |d         dz  z   d	z   d
z   dz   t	        |d   xs d      z   }| xj
                  |z  c_        | j
                  | j                  kD  r| j                          | j                  j                  |       y )Nr/   z	tbytes=%r   z	vbytes=%rr   E                   )

isinstancer   rQ   addr4   rO   rN   savecurrentr3   )rR   itemsizes      r   rl   zPostingPool.add   s   $q':.Ed1g0EE.7d1gz2IK$q'4II2DG$47|$47|a'(  	
 
 47=b)* 	D djj(IIKD!r   c                 "    | j                         S r   )itemsrR   s    r   iter_postingszPostingPool.iter_postings   s     zz|r   c                 <    t        j                  |        d| _        y Nr   )r	   rm   rO   rs   s    r   rm   zPostingPool.save   s    r   N)   )r   r   r   	namecharsrK   rZ   r^   ra   rl   rt   rm   r   r   r   rH   rH      s,     7I 
90""
r   rH   c                       e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
ed	        Zd
 ZddZddZedd       Zed        Zed        ZddZddZd Zd Zd Zd Zy)IndexWriteraw  High-level object for writing to an index.

    To get a writer for a particular index, call
    :meth:`~whoosh.index.Index.writer` on the Index object.

    >>> writer = myindex.writer()

    You can use this object as a context manager. If an exception is thrown
    from within the context it calls :meth:`~IndexWriter.cancel` to clean up
    temporary files, otherwise it calls :meth:`~IndexWriter.commit` when the
    context exits.

    >>> with myindex.writer() as w:
    ...     w.add_document(title="First document", content="Hello there.")
    ...     w.add_document(title="Second document", content="This is easy!")
    c                     | S r   r   rs   s    r   	__enter__zIndexWriter.__enter__   s    r   c                 J    |r| j                          y | j                          y r   )cancelcommitrR   exc_typeexc_valexc_tbs       r   __exit__zIndexWriter.__exit__   s    KKMKKMr   c                     t        |       S )a  Returns a context manager that calls
        :meth:`~IndexWriter.start_group` and :meth:`~IndexWriter.end_group` for
        you, allowing you to use a ``with`` statement to group hierarchical
        documents::

            with myindex.writer() as w:
                with w.group():
                    w.add_document(kind="class", name="Accumulator")
                    w.add_document(kind="method", name="add")
                    w.add_document(kind="method", name="get_result")
                    w.add_document(kind="method", name="close")

                with w.group():
                    w.add_document(kind="class", name="Calculator")
                    w.add_document(kind="method", name="add")
                    w.add_document(kind="method", name="multiply")
                    w.add_document(kind="method", name="get_result")
                    w.add_document(kind="method", name="close")
        )r   rs   s    r   groupzIndexWriter.group   s    * D!!r   c                      y)a  Start indexing a group of hierarchical documents. The backend should
        ensure that these documents are all added to the same segment::

            with myindex.writer() as w:
                w.start_group()
                w.add_document(kind="class", name="Accumulator")
                w.add_document(kind="method", name="add")
                w.add_document(kind="method", name="get_result")
                w.add_document(kind="method", name="close")
                w.end_group()

                w.start_group()
                w.add_document(kind="class", name="Calculator")
                w.add_document(kind="method", name="add")
                w.add_document(kind="method", name="multiply")
                w.add_document(kind="method", name="get_result")
                w.add_document(kind="method", name="close")
                w.end_group()

        A more convenient way to group documents is to use the
        :meth:`~IndexWriter.group` method and the ``with`` statement.
        Nr   rs   s    r   r   zIndexWriter.start_group   s    0 	r   c                      y)ziFinish indexing a group of hierarchical documents. See
        :meth:`~IndexWriter.start_group`.
        Nr   rs   s    r   r   zIndexWriter.end_group  s    
 	r   c                 @     | j                   j                  ||fi | y)zAdds a field to the index's schema.

        :param fieldname: the name of the field to add.
        :param fieldtype: an instantiated :class:`whoosh.fields.FieldType`
            object.
        N)r6   rl   )rR   	fieldname	fieldtyperT   s       r   	add_fieldzIndexWriter.add_field  s     		977r   c                 >     | j                   j                  |fi | y)a  Removes the named field from the index's schema. Depending on the
        backend implementation, this may or may not actually remove existing
        data for the field from the index. Optimizing the index should always
        clear out existing data for a removed field.
        N)r6   remove)rR   r   rT   s      r   remove_fieldzIndexWriter.remove_field  s     	9//r   c                     t         )z1Returns a reader for the existing index.
        NotImplementedError)rR   rT   s     r   rA   zIndexWriter.reader  s
    
 "!r   c                 <    ddl m}  || j                         fi |S )Nr   Searcher)whoosh.searchingr   rA   rR   rT   r   s      r   searcherzIndexWriter.searcher&  s    -000r   Nc                 F    ddl m}  |||      }| j                  ||      S )zDeletes any documents containing "term" in the "fieldname" field.
        This is useful when you have an indexed field containing a unique ID
        (such as "pathname") for each document.

        :returns: the number of documents deleted.
        r   )Term)r   )whoosh.queryr   delete_by_query)rR   r   textr   r   qs         r   delete_by_termzIndexWriter.delete_by_term+  s)     	&D!##A#99r   c                     |r|}n| j                         }	 d}|j                  |d      D ]  }| j                  |       |dz  } 	 |s|j                          |S # |s|j                          w w xY w)zkDeletes any documents matching a query object.

        :returns: the number of documents deleted.
        r   T)for_deletionr/   )r   docs_for_querydelete_documentr8   )rR   r   r   r*   r@   docnums         r   r   zIndexWriter.delete_by_query8  s     AA	E**14*@ $$V,
 	 	 s   0A A1c                     t         )z&Deletes a document by number.
        r   )rR   r   deletes      r   r   zIndexWriter.delete_documentN  s
     "!r   c                     t         )a^
  The keyword arguments map field names to the values to index/store::

            w = myindex.writer()
            w.add_document(path=u"/a", title=u"First doc", text=u"Hello")
            w.commit()

        Depending on the field type, some fields may take objects other than
        unicode strings. For example, NUMERIC fields take numbers, and DATETIME
        fields take ``datetime.datetime`` objects::

            from datetime import datetime, timedelta
            from whoosh import index
            from whoosh.fields import *

            schema = Schema(date=DATETIME, size=NUMERIC(float), content=TEXT)
            myindex = index.create_in("indexdir", schema)

            w = myindex.writer()
            w.add_document(date=datetime.now(), size=5.5, content=u"Hello")
            w.commit()

        Instead of a single object (i.e., unicode string, number, or datetime),
        you can supply a list or tuple of objects. For unicode strings, this
        bypasses the field's analyzer. For numbers and dates, this lets you add
        multiple values for the given field::

            date1 = datetime.now()
            date2 = datetime(2005, 12, 25)
            date3 = datetime(1999, 1, 1)
            w.add_document(date=[date1, date2, date3], size=[9.5, 10],
                           content=[u"alfa", u"bravo", u"charlie"])

        For fields that are both indexed and stored, you can specify an
        alternate value to store using a keyword argument in the form
        "_stored_<fieldname>". For example, if you have a field named "title"
        and you want to index the text "a b c" but store the text "e f g", use
        keyword arguments like this::

            writer.add_document(title=u"a b c", _stored_title=u"e f g")

        You can boost the weight of all terms in a certain field by specifying
        a ``_<fieldname>_boost`` keyword argument. For example, if you have a
        field named "content", you can double the weight of this document for
        searches in the "content" field like this::

            writer.add_document(content="a b c", _title_boost=2.0)

        You can boost every field at once using the ``_boost`` keyword. For
        example, to boost fields "a" and "b" by 2.0, and field "c" by 3.0::

            writer.add_document(a="alfa", b="bravo", c="charlie",
                                _boost=2.0, _c_boost=3.0)

        Note that some scoring algroithms, including Whoosh's default BM25F,
        do not work with term weights less than 1, so you should generally not
        use a boost factor less than 1.

        See also :meth:`Writer.update_document`.
        r   rR   fieldss     r   add_documentzIndexWriter.add_documentT  s    | "!r   c                     t         r   r   rR   rA   s     r   r7   zIndexWriter.add_reader  s    !!r   c                 *    d|v rt        |d         S |S )N_boostfloat)rR   r   defaults      r   
_doc_boostzIndexWriter._doc_boost  s    v)**Nr   c                 4    d|z  }||v rt        ||         S |S )Nz	_%s_boostr   )rR   r   r   r   boostkws        r   _field_boostzIndexWriter._field_boost  s(    	)f))Nr   c                     | j                   j                         D cg c]  \  }}||v r|j                  r| }}}|S c c}}w r   )r6   rr   unique)rR   r   namefieldunique_fieldss        r   _unique_fieldszIndexWriter._unique_fields  sG    151B1B1D =+$ FNu||  = ==s   ?c                    | j                  |      }|rW| j                         5 }|D cg c]	  }|||   f }}|j                  |      }|D ]  }| j                  |        	 ddd        | j                  di | yc c}w # 1 sw Y   !xY w)a  The keyword arguments map field names to the values to index/store.

        This method adds a new document to the index, and automatically deletes
        any documents with the same values in any fields marked "unique" in the
        schema::

            schema = fields.Schema(path=fields.ID(unique=True, stored=True),
                                   content=fields.TEXT)
            myindex = index.create_in("index", schema)

            w = myindex.writer()
            w.add_document(path=u"/", content=u"Mary had a lamb")
            w.commit()

            w = myindex.writer()
            w.update_document(path=u"/", content=u"Mary had a little lamb")
            w.commit()

            assert myindex.doc_count() == 1

        It is safe to use ``update_document`` in place of ``add_document``; if
        there is no existing document to replace, it simply does an add.

        You cannot currently pass a list or tuple of values to a "unique"
        field.

        Because this method has to search for documents with the same unique
        fields and delete them before adding the new document, it is slower
        than using ``add_document``.

        * Marking more fields "unique" in the schema will make each
          ``update_document`` call slightly slower.

        * When you are updating multiple documents, it is faster to batch
          delete all changed documents and then use ``add_document`` to add
          the replacements instead of using ``update_document``.

        Note that this method will only replace a *committed* document;
        currently it cannot replace documents you've added to the IndexWriter
        but haven't yet committed. For example, if you do this:

        >>> writer.update_document(unique_id=u"1", content=u"Replace me")
        >>> writer.update_document(unique_id=u"1", content=u"Replacement")

        ...this will add two documents with the same value of ``unique_id``,
        instead of the second document replacing the first.

        See :meth:`Writer.add_document` for information on
        ``_stored_<fieldname>``, ``_<fieldname>_boost``, and ``_boost`` keyword
        arguments.
        Nr   )r   r   _find_uniquer   r   )rR   r   r   r*   r   uniquetermsdocsr   s           r   update_documentzIndexWriter.update_document  s    l ++F3 1A@MNfTl3NN~~k2" 1F((011 	#F# O1 1s   BA>+B>BBc                      y)z0Finishes writing and unlocks the index.
        Nr   rs   s    r   r   zIndexWriter.commit  s     	r   c                      y)z\Cancels any documents/deletions added by this object
        and unlocks the index.
        Nr   rs   s    r   r~   zIndexWriter.cancel  s     	r   r   T)g      ?)r   r   r   __doc__r|   r   r   r   r   r   r   r   rA   r   r   r   r   r   r7   r   r   r   r   r   r~   r   r   r   rz   rz      s    "".480 " "1
:, " "
 =" ="~ " "?$B
r   rz   c                        e Zd Z	 	 d$dZd Zd Zd Zd Zd Zd Z	d Z
 fd	Z fd
Zd Zd%dZd Zd Zd&dZd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d Z!d  Z"d! Z#d'd"Z$d# Z% xZ&S )(SegmentWriterc
                    d | _         |r>|j                  d      | _         t        | j                   j                  ||      st        |ddlm}  |       }|| _        |j                  | _        |j                  | _	        |j                         }|j                  dz   | _        |j                  | _        |j                  | _        |x| _        | _        | j!                          | j                  j#                  d| j                  z        | _        |j'                  | j                  | j                        }|| _        |	xr |j+                         | _        d| _        d| _        t3        | j$                  | j(                  |      | _        |j7                  | j                  |      | _        |j;                  | j                  |      | _        d	| _        d| _         d | _!        y )
N	WRITELOCK)timeoutdelayr   )default_codecr/   z%s.tmpF)rS   T)"	writelocklockr   acquirer   whoosh.codecr   codecr5   	indexname	_read_toc
generationr6   r"   r   docbase_setup_doc_offsetstemp_storage_tempstoragenew_segment
newsegmentshould_assemblecompound	is_closed_addedrH   poolper_document_writerperdocwriterfield_writerfieldwritermergeoptimize	mergetype)rR   ix	poolclassr   r   _lkrS   r   r   r   rT   r   infor   s                 r   rK   zSegmentWriter.__init__  s    WW[1DN4>>117!&(=2!OE
 zz||~//A-kk%,,dl! !LL55h6OP&&t||T^^D
$ AZ%?%?%A 1 14??(/1	 "55dllJO --dllJG
r   c                 P    d| j                   j                  d| j                  dS )N< >)	__class__r   r   rs   s    r   __repr__zSegmentWriter.__repr__&  s     NN33T__EEr   c                 2    | j                   rt        d      y )NzThis writer is closed)r   r   rs   s    r   _check_statezSegmentWriter._check_state)  s    >> 788 r   c                     g | _         d}| j                  D ]0  }| j                   j                  |       ||j                         z  }2 y rv   )_doc_offsetsr"   r3   r)   )rR   baser*   s      r   r   z SegmentWriter._setup_doc_offsets-  sG     	&A$$T*AOO%%D	&r   c                 V    | j                   }t        |      dk(  ryt        ||      dz
  S )Nr/   r   )r   r4   r   )rR   r   offsetss      r   _document_segmentzSegmentWriter._document_segment4  s0     ##w<1GV,q00r   c                 n    | j                  |      }| j                  |   }| j                  |   }|||z
  fS r   )r   r   r"   )rR   r   
segmentnumoffsetrM   s        r   _segment_and_docnumz!SegmentWriter._segment_and_docnum<  sA     ++F3
"":.--
+''r   c              #   t   K   | j                   }|D ]#  \  }}}}}	||vr|||   }
n||z   }
|||
||	f % y wr   r6   )rR   rr   startdocdocmapr6   r   r   r   weightvbytesnewdocs              r   _process_postszSegmentWriter._process_postsE  s^     7< 	<3ItVVV&!!F*dFFF;;	<s   68c                     | j                   S r   )r   rs   s    r   r   zSegmentWriter.temp_storageQ  s       r   c                 ~    | j                          | j                  rt        d      t        t        |   ||fi | y Nz/Can't modify schema after adding data to writer)r   r   	Exceptionsuperr   r   )rR   r   	fieldspecrT   r   s       r   r   zSegmentWriter.add_fieldT  s8    ;;MNNmT,Y	LVLr   c                 z    | j                          | j                  rt        d      t        t        |   |       y r  )r   r   r  r  r   r   )rR   r   r   s     r   r   zSegmentWriter.remove_fieldZ  s1    ;;MNNmT/	:r   c                 :    t        d | j                  D              S )z
        Returns True if the current index has documents that are marked deleted
        but haven't been optimized out of the index yet.
        c              3   <   K   | ]  }|j                           y wr   )has_deletions.0r*   s     r   	<genexpr>z.SegmentWriter.has_deletions.<locals>.<genexpr>f       <1??$<   )anyr"   rs   s    r   r  zSegmentWriter.has_deletions`  s     <dmm<<<r   c                     | j                          |t        d | j                  D              k\  rt        d|z        | j	                  |      \  }}|j                  ||       y )Nc              3   <   K   | ]  }|j                           y wr   r(   )r  r?   s     r   r  z0SegmentWriter.delete_document.<locals>.<genexpr>j  s     F**,Fr  zNo document ID %r in this indexr   )r   sumr"   r   r   r   )rR   r   r   rM   	segdocnums        r   r   zSegmentWriter.delete_documenth  s^    SFFFF AF JKK!55f=	&9r   c                 :    t        d | j                  D              S )zO
        :returns: the total number of deleted documents in the index.
        c              3   <   K   | ]  }|j                           y wr   )deleted_countr  s     r   r  z.SegmentWriter.deleted_count.<locals>.<genexpr>t  r  r  )r  r"   rs   s    r   r  zSegmentWriter.deleted_counto  s    
 <dmm<<<r   c                 L    | j                  |      \  }}|j                  |      S r   )r   
is_deleted)rR   r   rM   r  s       r   r  zSegmentWriter.is_deletedv  s(    !55f=!!),,r   c                     ddl m} | j                          |j                  | j                  | j
                  | j                  | j                  |      S )Nr   )	FileIndex)reuse)whoosh.indexr  r   _readerr5   r6   r"   r   )rR   r   r  s      r   rA   zSegmentWriter.readerz  sD    *  t{{DMM!% ! ? 	?r   c                 6    | j                   j                         S r   )r   rt   rs   s    r   rt   zSegmentWriter.iter_postings  s    yy&&((r   c                     | j                  |j                         ||      }| j                  j                  }|D ]
  } ||        y r   )r  rt   r   rl   )rR   rA   r   r   rr   add_postro   s          r   add_postings_to_poolz"SegmentWriter.add_postings_to_pool  sB    ##F$8$8$:HfM99== 	DTN	r   c                 x    | j                  |||      }| j                  j                  | j                  ||       y r   )r  r   add_postingsr6   )rR   lengthsrr   r   r   s        r   write_postingszSegmentWriter.write_postings  s2    ##E8V<%%dkk7EBr   c           	         | j                   }|j                         ri }nd }| j                  }i }|D ]i  }||   }|j                  }	|	s|j	                  |      s)|j                  ||	      }
t        |
t        j                        r|
j                         }
|
||<   k |j                         D ]  \  }}|| j                  ||<   |j                  | j                         |D ]  }||   }|j                  ||      }|j                  |||j                  |      |       |j                   rB|j#                  ||      r0|j!                  |||j                         }|j%                  |||       ||v s||   |   }|j'                  ||j                  |        |j)                          | xj                  dz  c_         |S )Nr/   )r6   r  r   column_type
has_columncolumn_readerrk   r   TranslatingColumnReader
raw_column	iter_docsr   	start_docdoc_field_lengthr   getvector
has_vectoradd_vector_matcheradd_column_value
finish_doc)rR   rQ   rA   r6   r   pdwcolsr   fieldobjcoltypecreaderr   storedlengthvcvs                   r   write_per_doczSegmentWriter.write_per_doc  s    !FF# 	*Ii(H**G6,,Y7 ..y'Bgw'F'FG%002G")Y	* %..0 	NFF!!%vMM$++&' N	!),00Ci$jj3V= ??v'8'8'KfiIA**9hB$i0B((H4H4H"MN NNKK1K)	, r   c                 &     j                           j                  }t         fd|j                         D              }t         j                  j                               |z  } j                  ||      } j                  |||       d _        y )Nc              3   >   K   | ]  }|j                   v r|  y wr   r   )r  fnamerR   s     r   r  z+SegmentWriter.add_reader.<locals>.<genexpr>  s$      04;;.  0s   T)	r   r   rP   indexed_field_namesr6   namesrC  r&  r   )rR   rA   basedocndxnamesrQ   r   s   `     r   r7   zSegmentWriter.add_reader  s~    ++ 0&*D*D*F 0 0**,-8
##J7!!&'6:r   c                 <    |D ]  }||vst        d|d|       y )NzNo field named z in r
   )rR   r6   rQ   r   s       r   _check_fieldszSegmentWriter._check_fields  s0     	:D6!'+/)9 : :	:r   c           	         | j                          | j                  }| j                  }| j                  }| j                  j
                  }| j                  |      }t        |j                         D cg c]  }|j                  d      s| c}      }| j                  ||       |j                  |       |D ]  }	|j                  |	      }
|
||	   }d}|j                  rU| j                  ||	|      }|j                  |
      }|j                   }|D ]   \  }}}}||z  }|r||z  } ||	||||f       " |j#                         rB|j%                  |	      }|j'                  |
      D ]  }t)        |      d   } |||ddf        |j*                  }|rE|j,                  }|j/                  |
|d      }t        d |D              }|j1                  |	||       |j                  d|	z  |
      }|j2                  r|nd }|j5                  |	|||       |j6                  }|sn|r|j9                  |      }|j;                  |	||        |j=                          d| _        | xj                  dz  c_        y c c}w )	N_r   r/   index)modec              3   0   K   | ]  \  }}}}|||f  y wr   r   )r  r   rN  r   r  s        r   r  z-SegmentWriter.add_document.<locals>.<genexpr>  s)       G$;D!VV "&vv 6  Gs   z
_stored_%sT) r   r   r6   r   r   rl   r   r1   keys
startswithrL  r2  r4  indexedr   rO  scorableseparate_spellingspelling_fieldnamespellable_wordsr   r5  analyzerword_valuesadd_vector_itemsr?  r   r,  to_column_valuer8  r9  r   )rR   r   r   r6   r   r%  docboostr   rQ   r   valuer   r@  
fieldboostrr   rU  tbytesfreqr   r  
spellfieldwordvformatrY  vitems	customvalsvcolumnrB  s                                r   r   zSegmentWriter.add_document  s   ((99==??6*fkkm :d$(OOC$8 " : ;
6:.v&# 3	EIJJy)E}9%EF}} "..vy(K
 E* >>49 J0FD&&j(F$iHI	J &&("55i@
!11%8 ?D%d+A.Dj$1f=>?
 llG >> ,,UH7,K  G?E G G--iG 

<)#;UCI $llB""9eR@&&F)/**95--iDg3	Ej 	!qy:s   3I?c                 4    | j                   | j                  z
  S r   )r   r   rs   s    r   	doc_countzSegmentWriter.doc_count  s    {{T\\))r   c                 T    | j                   }|j                  | j                         |S r   )r   set_doc_countr   rR   r   s     r   get_segmentzSegmentWriter.get_segment  s#    __
  -r   c                     | j                   j                  st        d      | j                  j	                  | j
                  | j                               S )NzPer-doc writer is still open)r   r   r  r   per_document_readerr5   rn  rs   s    r   rp  z!SegmentWriter.per_document_reader  sA      **:;;zz--dllD<L<L<NOOr   c                     ||n| j                   }||n| j                  }||n| j                  }|rn|rt        }n|st        }nt
        } || | j                        S r   )r   r   r   rD   r#   rB   r"   )rR   r   r   r   s       r   _merge_segmentszSegmentWriter._merge_segments&  sb     "+!6IDNN	'38*

 I I#I t}}--r   c                 f   | j                   j                          | j                  j                  r| j	                         }nd }| j
                  j                         }| j                  j                  | j                  ||       | j                  j                          |r|j                          y y r   )
r   r8   r   length_statsrp  r   rt   r   r(  r6   )rR   pdrpostingss      r   _flush_segmentzSegmentWriter._flush_segment=  s    !::""**,CC99**,%%dkk3A IIK r   c                     | j                   j                  s| j                   j                          | j                  j                  s| j                  j                          | j                  j                          y r   )r   r   r8   r   r   cleanuprs   s    r   _close_segmentzSegmentWriter._close_segmentI  sT      **##%))""$		r   c                     | j                   r3| j                         }|j                  | j                         d|_         y y NT)r   rn  create_compound_filer5   rm  s     r   _assemble_segmentzSegmentWriter._assemble_segmentP  s6    ==))+J++DLL9"&J	 r   c                     | j                          | j                  j                          | j                  j                          | j	                         S r   )r   r   r8   r   rn  rs   s    r   _partial_segmentzSegmentWriter._partial_segmentW  sC     	! !!r   c                     | j                          | j                          | j                          | j                         S r   )rw  rz  r~  rn  rs   s    r   _finalize_segmentzSegmentWriter._finalize_segment`  s5     !!r   c                     ddl m}m}  || j                  || j                        }|j                  | j                  | j                          || j                  | j                  | j                  |       y )Nr   )TOCclean_files)r!  r  r  r6   r   writer5   r   )rR   r"   r  r  tocs        r   _commit_toczSegmentWriter._commit_tocj  sM    1 $++x9		$,,/DLL$..$//8Lr   c                     | j                   j                          | j                  r| j                  j                          d| _        y r|  )r   destroyr   releaser   rs   s    r   _finishzSegmentWriter._finishs  s2    !!#>>NN""$r   c                    | j                          | j                  |||      }| j                  r |j                  | j	                                n| j                          | j                  |       | j                          y)aR  Finishes writing and saves all additions and changes to disk.

        There are four possible ways to use this method::

            # Merge small segments but leave large segments, trying to
            # balance fast commits with fast searching:
            writer.commit()

            # Merge all segments into a single segment:
            writer.commit(optimize=True)

            # Don't merge any existing segments:
            writer.commit(merge=False)

            # Use a custom merge function
            writer.commit(mergetype=my_merge_function)

        :param mergetype: a custom merge function taking a Writer object and
            segment list as arguments, and returning a new segment list. If you
            supply a ``mergetype`` function, the values of the ``optimize`` and
            ``merge`` arguments are ignored.
        :param optimize: if True, all existing segments are merged with the
            documents you've added to this writer (and the value of the
            ``merge`` argument is ignored).
        :param merge: if False, do not merge small segments.
        N)r   rr  r   r3   r  rz  r  r  )rR   r   r   r   finalsegmentss        r   r   zSegmentWriter.commit|  sk    8 	,,Y%H;;   !7!7!9: !' 	r   c                 d    | j                          | j                          | j                          y r   )r   rz  r  rs   s    r   r~   zSegmentWriter.cancel  s#    r   )Ng        g?Trw   r   NTr   r   )NNN)'r   r   r   rK   r   r   r   r   r   r  r   r   r   r  r   r  r  rA   rt   r&  r*  rC  r7   rL  r   rj  rn  rp  rr  rw  rz  r~  r  r  r  r  r   r~   __classcell__)r   s   @r   r   r     s    GK>B)VF9&1(
<!M;=:=-?)C.`	:DL*
P..
'""M+Zr   r   c                   `    e Zd ZdZddZd Zd Zd Zd Zd Z	d	 Z
d
 Zd Zd Zd Zd Zd Zy)AsyncWritera  Convenience wrapper for a writer object that might fail due to locking
    (i.e. the ``filedb`` writer). This object will attempt once to obtain the
    underlying writer, and if it's successful, will simply pass method calls on
    to it.

    If this object *can't* obtain a writer immediately, it will *buffer*
    delete, add, and update method calls in memory until you call ``commit()``.
    At that point, this object will start running in a separate thread, trying
    to obtain the writer over and over, and once it obtains it, "replay" all
    the buffered method calls on it.

    In a typical scenario where you're adding a single or a few documents to
    the index as the result of a Web transaction, this lets you just create the
    writer, add, and commit, without having to worry about index locks,
    retries, etc.

    For example, to get an aynchronous writer, instead of this:

    >>> writer = myindex.writer()

    Do this:

    >>> from whoosh.writing import AsyncWriter
    >>> writer = AsyncWriter(myindex)
    Nc                    t         j                  j                  |        d| _        || _        |xs i | _        || _        g | _        	  | j                  j                  di | j
                  | _        y# t        $ r
 d| _        Y yw xY w)aG  
        :param index: the :class:`whoosh.index.Index` to write to.
        :param delay: the delay (in seconds) between attempts to instantiate
            the actual writer.
        :param writerargs: an optional dictionary specifying keyword arguments
            to to be passed to the index's ``writer()`` method.
        FNr   )
	threadingThreadrK   runningrO  
writerargsr   eventsr   r   )rR   rO  r   r  s       r   rK   zAsyncWriter.__init__  sz     	!!$'
$*
	+$**++>doo>DK 	DK	s   +A4 4BBc                 6    | j                   j                         S r   )rO  rA   rs   s    r   rA   zAsyncWriter.reader  s    zz  ""r   c                 T    ddl m}  || j                         fd| j                  i|S Nr   r   	fromindexr   r   rA   rO  r   s      r   r   zAsyncWriter.searcher  s$    -FFvFFr   c                     | j                   r t        | j                   |      |i | y | j                  j                  |||f       y r   )r   getattrr  r3   )rR   methodargsrT   s       r   _recordzAsyncWriter._record  s<    ;;(GDKK($9&9KKf56r   c                 x   d| _         | j                  }|*	  | j                  j                  di | j                  }|*| j                  D ]  \  }}} t        ||      |i |   |j                  | j                  i | j                   y # t        $ r" t        j                  | j                         Y {w xY w)NTr   )r  r   rO  r  r   timesleepr   r  r  r   
commitargscommitkwargs)rR   r   r  r  rT   s        r   runzAsyncWriter.run  s    n'***=T__= n
 %)KK 	5 FD&#GFF#T4V4	5t<$*;*;<	  '

4::&'s   &B (B98B9c                 *    | j                  d||       y )Nr   r  rR   r  rT   s      r   r   zAsyncWriter.delete_document      &f5r   c                 *    | j                  d||       y )Nr   r  r  s      r   r   zAsyncWriter.add_document      ^T62r   c                 *    | j                  d||       y )Nr   r  r  s      r   r   zAsyncWriter.update_document  r  r   c                 *    | j                  d||       y )Nr   r  r  s      r   r   zAsyncWriter.add_field  s    [$/r   c                 *    | j                  d||       y )Nr   r  r  s      r   r   zAsyncWriter.remove_field  r  r   c                 *    | j                  d||       y )Nr   r  r  s      r   r   zAsyncWriter.delete_by_term  s    %tV4r   c                     | j                   r | j                   j                  |i | y ||c| _        | _        | j	                          y r   )r   r   r  r  startr  s      r   r   zAsyncWriter.commit  s<    ;;DKK//15v.DOT.JJLr   c                 V    | j                   r | j                   j                  |i | y y r   )r   r~   r  s      r   r~   zAsyncWriter.cancel  s'    ;;DKK// r   )g      ?N)r   r   r   r   rK   rA   r   r  r  r   r   r   r   r   r   r   r~   r   r   r   r  r    sI    4(#G7
=6360350r   r  c                 *   ddl m} ddlm} | j	                         }|j
                  }|j                  }|j                  }|D ]  }	|	j                         j                  }
 ||||	      }|	j                  ||
      }|j                  |      }|D ]J  }|j                  |       |j                  |      D ]  }|j                  |        |j                          L |j!                           |D ]  }d||   _         |r|j%                  d       yy)a  Adds spelling files to an existing index that was created without
    them, and modifies the schema so the given fields have the ``spelling``
    attribute. Only works on filedb indexes.

    >>> ix = index.open_dir("testindex")
    >>> add_spelling(ix, ["content", "tags"])

    :param ix: a :class:`whoosh.filedb.fileindex.FileIndex` object.
    :param fieldnames: a list of field names to create word graphs for.
    :param force: if True, overwrites existing word graph files. This is only
        useful for debugging.
    r   )fstr%   TF)r   N)whoosh.automatar  r0   r&   r   r5   r6   r"   r   FST_EXTrV   GraphWriterstart_fieldlexiconinsertfinish_fieldr8   spellingr   )r   rQ   r   r  r&   r   r5   r6   r"   rM   extrrY   gwr   rc  s                   r   add_spellingr    s    $,YY[FnnG]]FH mmo%%'673-__Q# 	INN9%		),  		$ OO		
 	
   *	%)y"* E" r   c                   x    e Zd ZdZ	 	 ddZd Zd Zd Zed        Z	d Z
d	 Zd
 ZddZd Zd Zd ZddZd Zy)BufferedWritera  Convenience class that acts like a writer but buffers added documents
    before dumping the buffered documents as a batch into the actual index.

    In scenarios where you are continuously adding single documents very
    rapidly (for example a web application where lots of users are adding
    content simultaneously), using a BufferedWriter is *much* faster than
    opening and committing a writer for each document you add. If you're adding
    batches of documents at a time, you can just use a regular writer.

    (This class may also be useful for batches of ``update_document`` calls. In
    a normal writer, ``update_document`` calls cannot update documents you've
    added *in that writer*. With ``BufferedWriter``, this will work.)

    To use this class, create it from your index and *keep it open*, sharing
    it between threads.

    >>> from whoosh.writing import BufferedWriter
    >>> writer = BufferedWriter(myindex, period=120, limit=20)
    >>> # Then you can use the writer to add and update documents
    >>> writer.add_document(...)
    >>> writer.add_document(...)
    >>> writer.add_document(...)
    >>> # Before the writer goes out of scope, call close() on it
    >>> writer.close()

    .. note::
        This object stores documents in memory and may keep an underlying
        writer open, so you must explicitly call the
        :meth:`~BufferedWriter.close` method on this object before it goes out
        of scope to release the write lock and make sure any uncommitted
        changes are saved.

    You can read/search the combination of the on-disk index and the
    buffered documents in memory by calling ``BufferedWriter.reader()`` or
    ``BufferedWriter.searcher()``. This allows quasi-real-time search, where
    documents are available for searching as soon as they are buffered in
    memory, before they are committed to disk.

    .. tip::
        By using a searcher from the shared writer, multiple *threads* can
        search the buffered documents. Of course, other *processes* will only
        see the documents that have been written to disk. If you want indexed
        documents to become available to other processes as soon as possible,
        you have to use a traditional writer instead of a ``BufferedWriter``.

    You can control how often the ``BufferedWriter`` flushes the in-memory
    index to disk using the ``period`` and ``limit`` arguments. ``period`` is
    the maximum number of seconds between commits. ``limit`` is the maximum
    number of additions to buffer between commits.

    You don't need to call ``commit()`` on the ``BufferedWriter`` manually.
    Doing so will just flush the buffered documents to disk early. You can
    continue to make changes after calling ``commit()``, and you can call
    ``commit()`` multiple times.
    Nc                    || _         || _        || _        |xs i | _        |xs i | _        t        j                         | _         | j                   j                  di | j                  | _        | j                          d| _
        | j                  rJt        j                  | j                  | j                        | _        | j                  j                          yy)a
  
        :param index: the :class:`whoosh.index.Index` to write to.
        :param period: the maximum amount of time (in seconds) between commits.
            Set this to ``0`` or ``None`` to not use a timer. Do not set this
            any lower than a few seconds.
        :param limit: the maximum number of documents to buffer before
            committing.
        :param writerargs: dictionary specifying keyword arguments to be passed
            to the index's ``writer()`` method when creating a writer.
        r   Nr   )rO  periodrN   r  r  r  RLockr   r   _make_ram_indexbufferedcountTimerr   timerr  )rR   rO  r  rN   r  r  s         r   rK   zBufferedWriter.__init__~  s     

$*$*OO%	'djj'':$//: ;;"dkkBDJJJ r   c                 $    | j                          y r   )r8   r   s       r   r   zBufferedWriter.__exit__  s    

r   c                 (    ddl m}  |       | _        y )Nr   )MemoryCodec)whoosh.codec.memoryr  r   )rR   r  s     r   r  zBufferedWriter._make_ram_index  s    3 ]
r   c                 L    | j                   j                  | j                        S r   )r   rA   r6   rs   s    r   _get_ram_readerzBufferedWriter._get_ram_reader  s    zz  --r   c                 .    | j                   j                  S r   )r   r6   rs   s    r   r6   zBufferedWriter.schema  s    {{!!!r   c                 "   ddl m} | j                  j                         }| j                  5  | j                         }d d d        j                         r-|j                         r |||g      }|S |j                  |       |S # 1 sw Y   HxY w)Nr   )MultiReader)	r0   r  r   rA   r   r  rj  	is_atomicr7   )rR   rT   r  rA   	ramreaders        r   rA   zBufferedWriter.reader  s    .##%YY 	/,,.I	/  !$fi%89  !!),	/ 	/s   BBc                 T    ddl m}  || j                         fd| j                  i|S r  r  r   s      r   r   zBufferedWriter.searcher  s$    -FFvFFr   c                 (    | j                  d       y )NF)restart)r   rs   s    r   r8   zBufferedWriter.close  s    E"r   c                    | j                   r| j                  j                          | j                  5  | j	                         }| j                          d d d        | j                  r| j                  j                          | j                  j                  di | j                   d| _        |r | j                  j                  di | j                  | _        | j                   rJt        j                  | j                   | j                        | _        | j                  j                          y y y # 1 sw Y   xY w)Nr   r   )r  r  r~   r   r  r  r  r   r7   r   r  rO  r  r  r  r  )rR   r  r  s      r   r   zBufferedWriter.commit  s    ;;JJYY 	#,,.I  "	# KK""9--T__-+$**++>doo>DK{{&__T[[$++F


  "  	# 	#s   !D55D>c                 Z    | j                   j                  |       | j                          y r   )r   r7   r   r   s     r   r7   zBufferedWriter.add_reader  s    v&r   c                 n   | j                   5  | j                  j                  | j                  j                        5 } |j                  di | d d d        | xj
                  dz  c_        | j
                  | j                  k\  r| j                          d d d        y # 1 sw Y   PxY w# 1 sw Y   y xY w)Nr/   r   )r   r   r   r6   r   r  rN   r   )rR   r   ws      r   r   zBufferedWriter.add_document  s    YY 	""4;;#5#56 )!(() !#!!TZZ/	 	) )	 	s#   0B+BAB+B(	$B++B4c                 r    | j                   5  t        j                  | fi | d d d        y # 1 sw Y   y xY wr   )r   rz   r   r   s     r   r   zBufferedWriter.update_document  s1    YY 	8''77	8 	8 	8s   -6c                    | j                   5  | j                  j                         }||k  r| j                  j	                  ||       n,| j
                  j                  }|j	                  ||z
  |       d d d        y # 1 sw Y   y xY w)Nr  )r   rO  r)   r   r   r   rM   )rR   r   r   r   
ramsegments        r   r   zBufferedWriter.delete_document  sx    YY 	I::++-D}++F6+B!ZZ//
**6D=*H	I 	I 	Is   A*B  B	c                     | j                   j                         }||k  r| j                  j                  |      S | j	                         j                  ||z
        S r   )rO  r)   r   r  r  )rR   r   r   s      r   r  zBufferedWriter.is_deleted  sN    zz'')D=;;))&11'')44Vd]CCr   )<   
   NNr   )r   r   r   r   rK   r   r  r  propertyr6   rA   r   r8   r   r7   r   r   r   r  r   r   r   r  r  E  si    6p ?C <#
. " " G
##&
8IDr   r  r   ),
__future__r   r  r  bisectr   
contextlibr   whooshr   whoosh.compatr   r   whoosh.externalsortr	   whoosh.fieldsr   r!  r   whoosh.systemr   whoosh.utilr   r   whoosh.util.filelockr   whoosh.util.textr   r  r   r   r#   rB   rD   rF   rH   objectrz   r   r  r  r  r  BatchWriterr   r   r   <module>r     s   8 &   %  4 + + " $ ( ( '
	I 	  !H
1+ 1l|& |B
rK rnc0)""K c0P'#XrD[ rDl r   