
    =wg                     Z    d Z ddlZddlmZ  G d de      ZdZ ee      j                  Zy)a  This module contains an object that implements the Paice-Husk stemming
algorithm.

If you just want to use the standard Paice-Husk stemming rules, use the
module's ``stem()`` function::

    stemmed_word = stem(word)

If you want to use a custom rule set, read the rules into a string where the
rules are separated by newlines, and instantiate the object with the string,
then use the object's stem method to stem words::

    stemmer = PaiceHuskStemmer(my_rules_string)
    stemmed_word = stemmer.stem(word)
    N)defaultdictc                       e Zd ZdZ ej
                  dej                  ej                  z        Z ej
                  dej                        Z	d Z
d Zd Zd Zd Zy	)
PaiceHuskStemmerz2Implements the Paice-Husk stemming algorithm.
    zg
    ^(?P<ending>\w+)
    (?P<intact>[*]?)
    (?P<num>\d+)
    (?P<append>\w*)
    (?P<cont>[.>])
    z^\w+c                 N    t        t              | _        | j                  |       y)zi
        :param ruletable: a string containing the rule data, separated
            by newlines.
        N)r   listrules
read_rules)self	ruletables     L/var/www/horilla/myenv/lib/python3.12/site-packages/whoosh/lang/paicehusk.py__init__zPaiceHuskStemmer.__init__#   s    
 !&
	"    c                    | j                   }| j                  }|j                  d      D ]  }|j                         }|s|j	                  |      }|r|j                  d      d d d   }|d   }|j                  d      dk(  }t        |j                  d            }	|j                  d      }
|j                  d      d	k(  }||   j                  |||	|
|f       t        d
|z         y )N
endingintact*numappendcont>zBad rule: %r)		rule_exprr   splitstripmatchgroupintr   	Exception)r
   r   r   r   liner   r   lastcharr   r   r   r   s               r   r	   zPaiceHuskStemmer.read_rules+   s    NN	

OOD) 	7D::<DOOD)EX.tt4!":X.#5%++e,-X.{{6*c1h&&VT'JK 566!	7r   c                     t        dD cg c]  }|j                  |       c}D cg c]	  }|dkD  r| c}      }|j                  d      }|dkD  r||k  r|S |S c c}w c c}w )Naeiour   yr   )minfind)r
   wordvpvpyps         r   first_vowelzPaiceHuskStemmer.first_vowelA   sh    G<qdiil< V   YYs^6b2gI	 = s
   AA c                 V    dD ]#  }|j                  |      s|t        |      d  c S  |S )N)	kilomicromilliintraultramegananopicopseudo)
startswithlen)r
   r'   prefixs      r   strip_prefixzPaiceHuskStemmer.strip_prefixI   s7    1 	*Fv&CKL))	* r   c                    | j                   }| j                  j                  |      }|s|S | j                  |j	                  d            }d}d}|r| j                  |      }|j                  |d         }|s	 |S d}|D ]^  \  }	}
}}}|j                  |	      s|
r|s t        |      |z
  t        |      z   }|dk(  r|dk  s
|dkD  r|dk  rOd}|dd|z
   |z   }|} n |r|S )z:Returns a stemmed version of the argument string.
        r   Tr   F      N)	r   	stem_exprr   r:   r   r,   getendswithr8   )r
   r'   r   r   stem	is_intact
continuingpfvrulelistr   r   r   r   r   newlens                  r   rA   zPaiceHuskStemmer.stemP   s    

$$T*K  Q0	
""4(Cyyb*H, ) J5= 1VT==(i  Y_s6{:Ffqj!G
 % %IS>F2D!%J# 4 r   N)__name__
__module____qualname____doc__recompileUNICODEVERBOSEr   r>   r   r	   r,   r:   rA    r   r   r   r      sa     

  
bjj	 "I 

62::.I#7,&r   r   a  
ai*2.     { -ia > -   if intact }
a*1.      { -a > -    if intact }
bb1.      { -bb > -b   }
city3s.   { -ytic > -ys }
ci2>      { -ic > -    }
cn1t>     { -nc > -nt  }
dd1.      { -dd > -d   }
dei3y>    { -ied > -y  }
deec2ss.  { -ceed > -cess }
dee1.     { -eed > -ee }
de2>      { -ed > -    }
dooh4>    { -hood > -  }
e1>       { -e > -     }
feil1v.   { -lief > -liev }
fi2>      { -if > -    }
gni3>     { -ing > -   }
gai3y.    { -iag > -y  }
ga2>      { -ag > -    }
gg1.      { -gg > -g   }
ht*2.     { -th > -   if intact }
hsiug5ct. { -guish > -ct }
hsi3>     { -ish > -   }
i*1.      { -i > -    if intact }
i1y>      { -i > -y    }
ji1d.     { -ij > -id   --  see nois4j> & vis3j> }
juf1s.    { -fuj > -fus }
ju1d.     { -uj > -ud  }
jo1d.     { -oj > -od  }
jeh1r.    { -hej > -her }
jrev1t.   { -verj > -vert }
jsim2t.   { -misj > -mit }
jn1d.     { -nj > -nd  }
j1s.      { -j > -s    }
lbaifi6.  { -ifiabl > - }
lbai4y.   { -iabl > -y }
lba3>     { -abl > -   }
lbi3.     { -ibl > -   }
lib2l>    { -bil > -bl }
lc1.      { -cl > c    }
lufi4y.   { -iful > -y }
luf3>     { -ful > -   }
lu2.      { -ul > -    }
lai3>     { -ial > -   }
lau3>     { -ual > -   }
la2>      { -al > -    }
ll1.      { -ll > -l   }
mui3.     { -ium > -   }
mu*2.     { -um > -   if intact }
msi3>     { -ism > -   }
mm1.      { -mm > -m   }
nois4j>   { -sion > -j }
noix4ct.  { -xion > -ct }
noi3>     { -ion > -   }
nai3>     { -ian > -   }
na2>      { -an > -    }
nee0.     { protect  -een }
ne2>      { -en > -    }
nn1.      { -nn > -n   }
pihs4>    { -ship > -  }
pp1.      { -pp > -p   }
re2>      { -er > -    }
rae0.     { protect  -ear }
ra2.      { -ar > -    }
ro2>      { -or > -    }
ru2>      { -ur > -    }
rr1.      { -rr > -r   }
rt1>      { -tr > -t   }
rei3y>    { -ier > -y  }
sei3y>    { -ies > -y  }
sis2.     { -sis > -s  }
si2>      { -is > -    }
ssen4>    { -ness > -  }
ss0.      { protect  -ss }
suo3>     { -ous > -   }
su*2.     { -us > -   if intact }
s*1>      { -s > -    if intact }
s0.       { -s > -s    }
tacilp4y. { -plicat > -ply }
ta2>      { -at > -    }
tnem4>    { -ment > -  }
tne3>     { -ent > -   }
tna3>     { -ant > -   }
tpir2b.   { -ript > -rib }
tpro2b.   { -orpt > -orb }
tcud1.    { -duct > -duc }
tpmus2.   { -sumpt > -sum }
tpec2iv.  { -cept > -ceiv }
tulo2v.   { -olut > -olv }
tsis0.    { protect  -sist }
tsi3>     { -ist > -   }
tt1.      { -tt > -t   }
uqi3.     { -iqu > -   }
ugo1.     { -ogu > -og }
vis3j>    { -siv > -j  }
vie0.     { protect  -eiv }
vi2>      { -iv > -    }
ylb1>     { -bly > -bl }
yli3y>    { -ily > -y  }
ylp0.     { protect  -ply }
yl2>      { -ly > -    }
ygo1.     { -ogy > -og }
yhp1.     { -phy > -ph }
ymo1.     { -omy > -om }
ypo1.     { -opy > -op }
yti3>     { -ity > -   }
yte3>     { -ety > -   }
ytl2.     { -lty > -l  }
yrtsi5.   { -istry > - }
yra3>     { -ary > -   }
yro3>     { -ory > -   }
yfi3.     { -ify > -   }
ycn2t>    { -ncy > -nt }
yca3>     { -acy > -   }
zi2>      { -iz > -    }
zy1s.     { -yz > -ys  }
)rJ   rK   collectionsr   objectr   defaultrulesrA   rO   r   r   <module>rS      s<     
 #av aJtp %**r   