
    =wg!                     `    d Z ddlmZ ddlmZmZ ddlmZmZm	Z	 d Z
d Zd Z G d d	e      Zy
)a  This module contains low-level functions and a high-level class for parsing
the prolog file "wn_s.pl" from the WordNet prolog download
into an object suitable for looking up synonyms and performing query expansion.

http://wordnetcode.princeton.edu/3.0/WNprolog-3.0.tar.gz
    )defaultdict)iterkeys	text_type)SchemaIDSTOREDc                    t        t              }t        t              }| D ]  }|j                  d      s|dd }t        |d|j	                  d             }|j	                  d      }||dz   d }|j	                  d      }|d| j                         }|j                         s||   j                  |       ||   j                  |        ||fS )zfParses the WordNet wn_s.pl prolog file and returns two dictionaries:
    word2nums and num2words.
    zs(   N,'   )r   list
startswithintfindlowerisalphaappend)f	word2nums	num2wordslinenumqtwords          J/var/www/horilla/myenv/lib/python3.12/site-packages/whoosh/lang/wordnet.py
parse_filer   )   s    
 D!ID!I $t$ABx$		#'(YYs^BFG}YYs^CRy ||~$s##d#$" i    c                    t        t        t              }| j                  ||      }|j	                         }t        |      D ]+  }t        |||      }|j                  t        |      |       - |j                          |S )zCreates a Whoosh index in the given storage object containing
    synonyms taken from word2nums and num2words. Returns the Index
    object.
    )r   syns	indexname)
r   r   r   create_indexwriterr   synonymsadd_documentr   commit)	storager"   r   r   schemaixwr   r    s	            r   
make_indexr,   E   sv     &)F			f			:B
		A# 8	9d3	IdO$78 HHJIr   c                     | |   }t               }|D ]  }|j                  ||         } ||v r|j                  |       t        |      S )zzUses the word2nums and num2words dicts to look up synonyms
    for the given word. Returns a list of synonym strings.
    )setunionremovesorted)r   r   r   keysr    keys         r   r%   r%   U   sT    
 T?D5D *zz)C.)* t|D$<r   c                   V    e Zd ZdZd Zed        Zed        Zed	d       Zd	dZ	d Z
y)
	Thesaurusa	  Represents the WordNet synonym database, either loaded into memory
    from the wn_s.pl Prolog file, or stored on disk in a Whoosh index.

    This class allows you to parse the prolog file "wn_s.pl" from the WordNet prolog
    download into an object suitable for looking up synonyms and performing query
    expansion.

    http://wordnetcode.princeton.edu/3.0/WNprolog-3.0.tar.gz

    To load a Thesaurus object from the wn_s.pl file...

    >>> t = Thesaurus.from_filename("wn_s.pl")

    To save the in-memory Thesaurus to a Whoosh index...

    >>> from whoosh.filedb.filestore import FileStorage
    >>> fs = FileStorage("index")
    >>> t.to_storage(fs)

    To load a Thesaurus object from a Whoosh index...

    >>> t = Thesaurus.from_storage(fs)

    The Thesaurus object is thus usable in two ways:

    * Parse the wn_s.pl file into memory (Thesaurus.from_*) and then look up
      synonyms in memory. This has a startup cost for parsing the file, and uses
      quite a bit of memory to store two large dictionaries, however synonym
      look-ups are very fast.

    * Parse the wn_s.pl file into memory (Thesaurus.from_filename) then save it to
      an index (to_storage). From then on, open the thesaurus from the saved
      index (Thesaurus.from_storage). This has a large cost for storing the index,
      but after that it is faster to open the Thesaurus (than re-parsing the file)
      but slightly slower to look up synonyms.

    Here are timings for various tasks on my (fast) Windows machine, which might
    give an idea of relative costs for in-memory vs. on-disk.

    ================================================ ================
    Task                                             Approx. time (s)
    ================================================ ================
    Parsing the wn_s.pl file                         1.045
    Saving to an on-disk index                       13.084
    Loading from an on-disk index                    0.082
    Look up synonyms for "light" (in memory)         0.0011
    Look up synonyms for "light" (loaded from disk)  0.0028
    ================================================ ================

    Basically, if you can afford spending the memory necessary to parse the
    Thesaurus and then cache it, it's faster. Otherwise, use an on-disk index.
    c                 .    d | _         d | _        d | _        y )N)w2nn2wsearcher)selfs    r   __init__zThesaurus.__init__   s    r   c                 D     |        }t        |      \  |_        |_        |S )a  Creates a Thesaurus object from the given file-like object, which should
        contain the WordNet wn_s.pl file.

        >>> f = open("wn_s.pl")
        >>> t = Thesaurus.from_file(f)
        >>> t.synonyms("hail")
        ['acclaim', 'come', 'herald']
        )r   r7   r8   )clsfileobjthess      r   	from_filezThesaurus.from_file   s#     u'0$(r   c                     t        |d      }	 | j                  |      |j                          S # |j                          w xY w)zCreates a Thesaurus object from the given filename, which should
        contain the WordNet wn_s.pl file.

        >>> t = Thesaurus.from_filename("wn_s.pl")
        >>> t.synonyms("hail")
        ['acclaim', 'come', 'herald']
        rb)openr@   close)r=   filenamer   s      r   from_filenamezThesaurus.from_filename   s4     4 	==#GGIAGGIs	   / Ac                 b     |        }|j                  |      }|j                         |_        |S )aJ  Creates a Thesaurus object from the given storage object,
        which should contain an index created by Thesaurus.to_storage().

        >>> from whoosh.filedb.filestore import FileStorage
        >>> fs = FileStorage("index")
        >>> t = Thesaurus.from_storage(fs)
        >>> t.synonyms("hail")
        ['acclaim', 'come', 'herald']

        :param storage: A :class:`whoosh.store.Storage` object from
            which to load the index.
        :param indexname: A name for the index. This allows you to
            store multiple indexes in the same storage object.
        r!   )
open_indexr9   )r=   r(   r"   r?   indexs        r   from_storagezThesaurus.from_storage   s1    " u""Y"7(r   c                     | j                   r| j                  st        d      t        ||| j                   | j                         y)a  Creates am index in the given storage object from the
        synonyms loaded from a WordNet file.

        >>> from whoosh.filedb.filestore import FileStorage
        >>> fs = FileStorage("index")
        >>> t = Thesaurus.from_filename("wn_s.pl")
        >>> t.to_storage(fs)

        :param storage: A :class:`whoosh.store.Storage` object in
            which to save the index.
        :param indexname: A name for the index. This allows you to
            store multiple indexes in the same storage object.
        zNo synonyms loadedN)r7   r8   	Exceptionr,   )r:   r(   r"   s      r   
to_storagezThesaurus.to_storage   s3     xxtxx0117Itxx:r   c                     |j                         }| j                  r| j                  j                  |      d   S t        | j                  | j
                  |      S )zReturns a list of synonyms for the given word.

        >>> thesaurus.synonyms("hail")
        ['acclaim', 'come', 'herald']
        )r   r    )r   r9   documentr%   r7   r8   )r:   r   s     r   r%   zThesaurus.synonyms   sJ     zz|====))t)4V<<DHHdhh55r   N)THES)__name__
__module____qualname____doc__r;   classmethodr@   rF   rJ   rM   r%    r   r   r5   r5   d   sS    3j
      *;&6r   r5   N)rT   collectionsr   whoosh.compatr   r   whoosh.fieldsr   r   r   r   r,   r%   objectr5   rV   r   r   <module>r[      s5   8 $ - , , 8 N6 N6r   