o $aD@sUdZddlZddlZddlZddlmZddlmZddlm Z m Z m Z m Z m Z mZmZmZmZddlmZddlmZddlmZmZdd lmZdd lmZdd lmZGd d d Zddl m!Z!de"dee"fddZ#iddddde!ddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3Z$e e"e fe%d4<Gd5d6d6Z&e&Z'Gd7d8d8ej(Z)Gd9d:d:Z*dS);z sphinx.search ~~~~~~~~~~~~~ Create a full-text search index for offline search. :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS. :license: BSD, see LICENSE for details. N) import_module)path) IOAnyDictIterableListOptionalSetTupleType)nodes)Node)addnodes package_dir)BuildEnvironment) splitter_code)jsdumpc@seZdZUdZdZeed<dZeed<eZ e eed<dZ eed<dZ eed<dZ ed Zd ed dfd d Zd ed dfddZded eefddZded efddZded efddZdS)SearchLanguagea< This class is the base class for search natural language preprocessors. If you want to add support for a new language, you should override the methods of this class. You should override `lang` class property too (e.g. 'en', 'fr' and so on). .. attribute:: stopwords This is a set of stop words of the target language. Default `stopwords` is empty. This word is used for building index and embedded in JS. .. attribute:: js_splitter_code Return splitter function of JavaScript version. The function should be named as ``splitQuery``. And it should take a string and return list of strings. .. versionadded:: 3.0 .. attribute:: js_stemmer_code Return stemmer class of JavaScript version. This class' name should be ``Stemmer`` and this class must have ``stemWord`` method. This string is embedded as-is in searchtools.js. This class is used to preprocess search word which Sphinx HTML readers type, before searching index. Default implementation does nothing. Nlang language_name stopwordsjs_splitter_codejs_stemmer_rawcodez /** * Dummy stemmer for languages without stemming rules. */ var Stemmer = function() { this.stemWord = function(w) { return w; } } z(?u)\w+optionsreturncCs||_||dSN)rinitselfrr 8/usr/lib/python3/dist-packages/sphinx/search/__init__.py__init__JszSearchLanguage.__init__cCsdS)zK Initialize the class with the options the user has given. Nr rr r r!rNszSearchLanguage.initinputcCs |j|S)z This method splits a sentence into words. Default splitter splits input at white spaces, which should be enough for most languages except CJK languages. )_word_refindall)rr#r r r!splitSs zSearchLanguage.splitwordcCs|S)a This method implements stemming algorithm of the Python version. Default implementation does nothing. You should implement this if the language has any stemming rules. This class is used to preprocess search words before registering them in the search index. The stemming of the Python version and the JS version (given in the js_stemmer_code attribute) must be compatible. r rr'r r r!stem[s zSearchLanguage.stemcCsTt|dkp)t|dkrdt|dkodknp(t|ddko(||jv S)z Return true if the target word should be registered in the search index. This method is called after stemming. riA0i0)lenordrr(r r r! word_filterhs ,zSearchLanguage.word_filter)__name__ __module__ __qualname____doc__rstr__annotations__rsetrr rrjs_stemmer_coderecompiler$rr"rrr&r)boolr.r r r r!rs       r) SearchEnglishsourcercCs4t}|D]}|dd}||q|S)zs Parse snowball style word list like this: * http://snowball.tartarus.org/algorithms/finnish/stop.txt |r)r5 splitlinesr&update)r;resultliner r r!parse_stop_wordys  rAdazsphinx.search.da.SearchDanishdezsphinx.search.de.SearchGermaneneszsphinx.search.es.SearchSpanishfizsphinx.search.fi.SearchFinnishfrzsphinx.search.fr.SearchFrenchhuz sphinx.search.hu.SearchHungarianitzsphinx.search.it.SearchItalianjazsphinx.search.ja.SearchJapanesenlzsphinx.search.nl.SearchDutchnoz sphinx.search.no.SearchNorwegianptz!sphinx.search.pt.SearchPortugueserozsphinx.search.ro.SearchRomanianruzsphinx.search.ru.SearchRussiansvzsphinx.search.sv.SearchSwedishtrzsphinx.search.tr.SearchTurkishzhzsphinx.search.zh.SearchChinese languagesc@sdeZdZdZdZdZdedefddZdedefd d Z ded e dd fd dZ d e defddZ d S)_JavaScriptIndexz The search index as JavaScript file that calls a function on the documentation search object to register the index. zSearch.setIndex()datarcCs|jt||jSr)PREFIXrdumpsSUFFIX)rrVr r r!rXsz_JavaScriptIndex.dumpsscCsH|t|jt|j }|r||jr||jstdt|S)Nz invalid data)r,rWrY startswithendswith ValueErrorrloads)rrZrVr r r!r^s   z_JavaScriptIndex.loadsfNcCs|||dSr)writerX)rrVr_r r r!dumpz_JavaScriptIndex.dumpcCs||Sr)r^read)rr_r r r!loadsz_JavaScriptIndex.load) r/r0r1r2rWrYrr3rXr^rrardr r r r!rTsrTcsVeZdZdZdejdeddffdd Zdej de fd d Z de ddfd d Z ZS) WordCollectorzG A special visitor that collects words for the `IndexBuilder`. documentrrNcs"t|g|_g|_||_dSr)superr" found_wordsfound_title_wordsr)rrfr __class__r r!r"s  zWordCollector.__init__nodecCsDt|tjr |ddkr |d}|durdS||jjkr dSdS)NnamekeywordsrTF) isinstancermetagetr)rrl meta_langr r r!is_meta_keywordss  zWordCollector.is_meta_keywordscCst|tjr tjt|tjr=d|ddvr:tdd| }tdd|}tdd|}|j |j |tjt|tj rQ|j |j | dSt|tjre|j |j | dSt|tjr||r|d}dd |d D}|j |dSdSdS) Nhtmlformatz(?is)z(?is)z<[^<]+?>contentcSsg|]}|qSr )strip).0keywordr r r! z0WordCollector.dispatch_visit..,)ror commentSkipNoderawrqr&r7subastextrhextendrTexttitlerirrprs)rrlnodetextrnr r r!dispatch_visits$    zWordCollector.dispatch_visit)r/r0r1r2r rfrr"rrpr9rsrr __classcell__r r rjr!res  rec @sleZdZdZeedZdedede deddf d d Z d e d e ddfd dZ d e d e ddfddZde eefde eeeeeeeefffddZde dee eeefe eeefffddZde ee ffddZdefddZdeeddfddZdededed ejddf d!d"Zde ee ffd#d$Zdeefd%d&Zdeefd'd(Zdefd)d*ZdS)+ IndexBuilderzi Helper class that creates a search index based on the doctrees passed to the `feed` method. )rpickleenvrrscoringrNc Cs||_i|_i|_i|_i|_i|_i|_i|_t |}|dur/d|vr/t | dd}|dur9t ||_ nt |trS|dd\}}tt||}|||_ n|||_ |rwt|d}||_Wdn1sqwYnd|_t|_dS)N_r.rbrv)r_titles _filenames_mapping_title_mapping _stem_cache _objtypes _objnamesrSrqr&r:rror3rsplitgetattrropenrcdecodejs_scorer_coderr) rrrrr lang_classmodule classnamefpr r r!r"s2       zIndexBuilder.__init__streamrucst|tr |j|}||}t|tr|d|jjkr!td|dtt |d|_ tt |d|_ dt tt fdt tttfffdd }||d |_||d |_d S) zReconstruct from frozen data. envversionz old formatdocnames filenamestitlesmappingrcsJi}|D]\}}t|tr|h||<qfdd|D||<q|S)Ncsh|]}|qSr r )ryiindex2fnr r! %r|z8IndexBuilder.load..load_terms..)itemsroint)rrvkvrr r! load_termss  z%IndexBuilder.load..load_termsterms titletermsN)ror3formatsrddictrqrversionr]ziprrrrr rr)rrrufrozenrr rr!rds    * zIndexBuilder.loadcCs(t|tr |j|}|||dS)z"Dump the frozen index to a stream.N)ror3rrafreeze)rrrur r r!ra,s  zIndexBuilder.dumpfn2indexc Cs@i}|j}|j}t|jjD]\}}t|D]\}}} } } } | |vr'q| dkr,qt|}t|}| d\} }}| | g}z||| f}Wn/t y{t |}|||| f<|j | }|rr|| t||f||<n|| | f||<Ynw| |krd}n | | d|krd}n| }||| || ||fqq|S)Nrrrv-)rrsortedrdomainsr get_objectsrtescape rpartition setdefaultKeyErrorr, object_typesrqr3 get_type_nameappend)rrrotypesonames domainnamedomainfullnamedispnametypedocnameanchorprioprefixrrmplist typeindexotype shortanchorr r r!r2sF          zIndexBuilder.get_objectscs|iif}t||j|jfD].\}}|D]%\}}t|dkr-|\}|vr,|||<qtfdd|D||<qq |S)Nrcsg|] }|vr|qSr r )ryfnrr r!r{`sz*IndexBuilder.get_terms..)rrrrr,r)rrrvsrrrrrr rr! get_termsWs  zIndexBuilder.get_termsc sttj\}}fdd|D}ddt|D}|\}}|}ddjD}j} t ||||||| |j j d S)z/Create a usable data structure for serializing.csg|]}j|qSr )rrq)ryrrr r!r{fsz'IndexBuilder.freeze..cSsi|]\}}||qSr r )ryrr_r r r! gsz'IndexBuilder.freeze..cSs&i|]\}}||dd|dqS)r:rr )ryrrr r r!rks&) rrrrobjectsobjtypesobjnamesrr) rrrr enumeraterrrrrrr) rrrrrr title_termsrrrr rr!rcs  zIndexBuilder.freezecCsd|jj|jjfS)Nz %s (code: %s))rrrr r r!labelqrbzIndexBuilder.labelrcCs|i}i}|D]}||jvr|j|||<|j|||<q||_||_|jD]}||q'|jD]}||q4dS)z-Remove data for all docnames not in the list.N)rrrvaluesintersection_updater)rr new_titles new_filenamesr wordnamesr r r!prunets   zIndexBuilder.prunerfilenamerdoctreec s|j|<|j|<t|j}||dtdtffdd }jj}|jD]%}||} || r=j | t  |q'||rLj |t  |q'|j D]+}||} || s`||r`|} |j | t v} || r{| s{j | t  |qPdS)zFeed a doctree to the index.r'rcs@zj|WStyj|j|<j|YSwr)rrrr)lower)r'rr r!r)s   zIndexBuilder.feed..stemN)rrrerwalkr3r.rirrr5addrhrqr) rrrrrvisitorr)_filterr' stemmed_wordalready_indexedr rr!feeds,       zIndexBuilder.feedcCs:|jjr |jj}n|j}|tt|jj|j|dS)N)search_language_stemming_codesearch_language_stop_wordssearch_scorer_toolsearch_word_splitter_code)rrget_js_stemmer_coderrXrrr)rrr r r!context_for_searchtools z#IndexBuilder.context_for_searchtoolcCs"|jjrddd|jjfDSgS)z8Returns a list of non-minified stemmer JS files to copy.cSsg|] }ttdd|qS)searchznon-minified-js)rjoinr)ryfnamer r r!r{sz8IndexBuilder.get_js_stemmer_rawcodes..base-stemmer.js)rrrr r r!get_js_stemmer_rawcodess  z$IndexBuilder.get_js_stemmer_rawcodescCsdSrr rr r r!get_js_stemmer_rawcodesz#IndexBuilder.get_js_stemmer_rawcodecCs|jjrNttdd}tt|d }|}Wdn1s"wYtt||jj }|}Wdn1s@wYd|||jjfS|jjS)zsn   ,      \     ,