o eZ@sddlZddlZddlZddlmZddlmZddlmZm Z zddlm Z Wne y7ddlm Z YnwzddlmZWn e yKdZYnwddlmZdd lmZgd ZejZd ed <d]ddZ ddZedZd^ddZddZdZdZdZe d_iddddddddddd dd!dd"dd#dd$dd%dd&dd'dd(dd)dd*dd+dd,dd-dd.dd/dd0ed1ed2ed3ed4eZ!de!d5<d6d7Z"d8d9Z#d:d;Z$Gdd?d?e&Z'Gd@dAdAe'Z(GdBdCdCe%Z)e)Z*GdDdEdEe)Z+e+Z,ZGdFdGdGe%Z-GdHdIdIe%Z.GdJdKdKeZ/GdLdMdMe/Z0GdNdOdOe/Z1d]dPdQZ2d`dRdSZ3dadTdUZ4dVdWZ5e6dXdYe7dZDZ8d[d\Z9dS)bN)entities) HTMLParser)Markupescape)soft_str) soft_unicode) LazyProxy) TracError) to_unicode) DeuglifierFormTokenInjectorTracHTMLSanitizerr find_elementhtmlis_safe_origin plaintexttag to_fragment stripentities striptagsvalid_html_bytesunescape'aposTcCstt|tr|St|trt|St|}|r&d|vr|Stt|ddSd|vr,|Stt|ddddS)a3Create a Markup instance from a string and escape special characters it may contain (<, >, & and "). :param text: the string to escape; if not a string, it is assumed that the input can be converted to a string :param quotes: if ``True``, double quote characters are escaped in addition to the other special characters >>> escape('"1 < 2"') Markup('"1 < 2"') >>> escape(['"1 < 2"']) Markup("['"1 < 2"']") If the `quotes` parameter is set to `False`, the " character is left as is. Escaping quotes is generally only required for strings that are to be used in attribute values. >>> escape('"1 < 2"', quotes=False) Markup('"1 < 2"') >>> escape(['"1 < 2"'], quotes=False) Markup('[\'"1 < 2"\']') However, `escape` behaves slightly differently with `Markup` and `Fragment` behave instances, as they are passed through unmodified. >>> escape(Markup('"1 < 2 '"')) Markup('"1 < 2 '"') >>> escape(Markup('"1 < 2 '"'), quotes=False) Markup('"1 < 2 '"') >>> escape(tag.b('"1 < 2"')) Markup('"1 < 2"') >>> escape(tag.b('"1 < 2"'), quotes=False) Markup('"1 < 2"') :return: the escaped `Markup` string :rtype: `Markup` z''zz"") isinstancerFragment escape_quotesstrreplace)textquoteser$0/usr/lib/python3/dist-packages/trac/util/html.pyr3s - rcCs|sdSt|ts |S|S)aWReverse-escapes &, <, >, and " and returns a `str` object. >>> unescape(Markup('1 < 2')) '1 < 2' If the provided `text` object is not a `Markup` instance, it is returned unchanged. >>> unescape('1 < 2') '1 < 2' :param text: the text to unescape :return: the unescsaped string :rtype: `str` )rrrr!r$r$r%rns  rz-&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)Fcs*fdd}t|trt|}t||S)uReturn a copy of the given text with any character or numeric entities replaced by the equivalent UTF-8 characters. >>> stripentities('1 < 2') '1 < 2' >>> stripentities('more …') 'more …' >>> stripentities('…') '…' >>> stripentities('…') '…' >>> stripentities(Markup('…')) '…' If the `keepxmlentities` parameter is provided and is a truth value, the core XML entities (&, ', >, < and ") are left intact. >>> stripentities('1 < 2 …', keepxmlentities=True) '1 < 2 …' :return: a `str` instance with entities removed :rtype: `str` cs|dr%|d}|drt|ddd}t|St|d}t|S|d}r4|dvr4d|Sztt|WStyNrJd|YS|YSw) N)xX )amprgtltquotz&%s;z&%s;)group startswithintchr_name2codepointKeyError)matchrefkeepxmlentitiesr$r%_replace_entitys"        z&stripentities.._replace_entity)rrr_STRIPENTITIES_REsub)r!r;r<r$r:r%rs   rcCs t|S)aReturn a copy of the text with any XML/HTML tags removed. >>> striptags('Foo bar') 'Foo bar' >>> striptags('Foo') 'Foo' >>> striptags('Foo
') 'Foo' HTML/XML comments are stripped, too: >>> striptags('test') 'test' :param text: the string to remove tags from :return: a `str` instance with all tags removed :rtype: `str` )rrr'r$r$r%rs r)noyes)offon)falsetrue autofocusautoplaycheckedcontrolsdefaultdeferdisabledformnovalidatehiddenismaploopmultiplemuted novalidateopenreadonlyrequiredreversedscopedseamlessselectedcontenteditable draggable spellcheck translate autocompleteasynccCs|dkrt|trtdi|pd}n>> classes('foo', 'bar') 'foo bar' In addition, the names of any supplied keyword arguments are added if they have a truth value: >>> classes('foo', bar=True) 'foo bar' >>> classes('foo', bar=False) 'foo' >>> classes(foo=True, bar=True) 'bar foo' If none of the arguments are added to the list, this function returns `''`: >>> classes(bar=False) '' Nc3s|] }|r|VqdSNr$).0kkwargsr$r% szclasses.. )rdfilterextendsortedjoin)argsrprcr$ror%rcs rccOspi}g}td|D]}t|tr||q ||q |||ddt|dddDd|S)aHelper function for dynamically assembling a list of CSS style name and values in templates. Any positional arguments are added to the list of styles. All positional arguments must be strings or dicts: >>> styles('foo: bar', 'fu: baz', {'bottom-right': '1em'}) 'foo: bar; fu: baz; bottom-right: 1em' In addition, the names of any supplied keyword arguments are added if they have a string value: >>> styles('foo: bar', fu='baz') 'foo: bar; fu: baz' >>> styles('foo: bar', bar=False) 'foo: bar' If none of the arguments are added to the list, this function returns `''`: >>> styles(bar=False) '' Ncss$|] \}}|rd||fVqdS)z%s: %sNr$)rmrnvr$r$r%rqDs zstyles..cSs|dS)Nrr$)ir$r$r%Eszstyles..)rh; ) rsrrbupdateappendrtruitemsrv)rwrpdreargr$r$r%re#s      rec@sLeZdZdZdZddZddZddZd d Zd d Z d dZ ddZ dS)rz8A fragment represents a sequence of strings or elements.)childrencGsg|_|D]}||qdSrl)rr}selfrwrr$r$r%__init__Os zFragment.__init__cCs tt|Srl)rrrr$r$r%__html__Ts zFragment.__html__cCddd|jDS)Nr&css|]}t|dVqdS)FNrrmcr$r$r%rqXsz#Fragment.__str__..rvrrr$r$r%__str__WzFragment.__str__cCs t||Srlr)rotherr$r$r%__add__Z zFragment.__add__cGs|D]}||q|Srl)r}rr$r$r%__call__]s zFragment.__call__cCs|r2t|tttttfr|j|dSz |D]}||qWdSty1|j|YdSw|dkr>|jddSdS)Nr0) rrrbytesr4floatrr} TypeError)rreltr$r$r%r}bs  zFragment.appendcCr)Nr&css*|]}t|tr |nt|VqdSrl)rras_textrrr$r$r%rqqs"z#Fragment.as_text..rrr$r$r%rps zFragment.as_textN) __name__ __module__ __qualname____doc__ __slots__rrrrrr}rr$r$r$r%rJs rc@sHeZdZdZdZiZdZdZddZddZ d d Z d d Z d dZ dS) XMLElementzXAn element represents an XML element, with a tag name, attributes and content. )rattribr$z/>cOs<tj|g|Rt||_|r|||_dS|j|_dSrl)rrrr_dict_from_kwargs EMPTY_ATTRIBr)rrrwrpr$r$r%rs  zXMLElement.__init__cCs|Srlr$rrnrxr$r$r% _attr_valuezXMLElement._attr_valuecCshg}|D])\}}|dur/|dddkr|dd}|||}|dur/||t|fqt|S)N_)r~rr}rrb)rrpattrsrnrxr$r$r%rs  zXMLElement._dict_from_kwargscOsD|r||}|r|jr|j|n||_|D]}||q|Srl)rrr|r})rrwrprrr$r$r%rs  zXMLElement.__call__cCsd|j}|jr+g}t|jD]}|j|}|r!|d||fq|r+|d|7}|js7|jrI|j|jvrI|dt|d|jd7}|S||j 7}|S)N< %s="%s"r&>brhrcolimgwbrareabaselinkmetaembedinputparamtrackkeygensourcecommandz />r$cCs t||Srl)rkrr$r$r%rrzElement._attr_valueN)rrrrrrrrr$r$r$r%rs  rc@ eZdZdZddZddZdS)XMLElementFactoryzhAn XML element factory can be used to build Fragments and XMLElements for arbitrary tag names. cGst|Srlr)rrwr$r$r%rzXMLElementFactory.__call__cCt|Srl)rrrr$r$r% __getattr__rzXMLElementFactory.__getattr__N)rrrrrrr$r$r$r%rs rc@seZdZdZddZdS)ElementFactoryzaAn element factory can be used to build Fragments and Elements for arbitrary tag names. cCrrl)rrr$r$r%rrzElementFactory.__getattr__N)rrrrrr$r$r$r%rs rc@seZdZdZegdZegdZegdZegdZegdZ edgZ eeeee e fdd Z e d jZe d jZd d ZddZddZddZddZddZe djZe de jjZddZddZe djZdd Zd!S)"r aSanitize HTML constructions which are potentially vector of phishing or XSS attacks, in user-supplied HTML. The usual way to use the sanitizer is to call the `sanitize` method on some potentially unsafe HTML content. See also `genshi.HTMLSanitizer`_ from which the TracHTMLSanitizer has evolved. .. _genshi.HTMLSanitizer: http://genshi.edgewall.org/wiki/Documentation/filters.html#html-sanitizer )Gaabbracronymaddressrbbig blockquoterbuttoncaptioncentercitecodercolgroupdddeldfndirdivdldtemfieldsetfontformh1h2h3h4h5h6rryrrinskbdlabellegendlimapmenuoloptgroupoptionppreqssampselectsmallspanstrikestrongr>suptabletbodytdtextareatfootththeadtrttuulvar)Iracceptzaccept-charset accesskeyactionalignaltaxisbgcolorborder cellpadding cellspacingcharcharoffcharsetrGrr`clearcolscolspancolorcompactcoordsdatetimerrKenctypeforframeheadersheighthrefhreflanghspaceidrNrlanglongdesc maxlengthmediamethodrPnamenohrefnoshadenowrappromptrTrelrevrowsrowspanrulesscoperYshapesizersrcstartrasummarytabindextargettitletypeusemapvalignvaluevspacewidth)e backgroundzbackground-attachmentzbackground-colorzbackground-imagezbackground-positionzbackground-repeatr z border-bottomzborder-bottom-colorzborder-bottom-stylezborder-bottom-left-radiuszborder-bottom-right-radiuszborder-bottom-widthzborder-collapsez border-colorz border-leftzborder-left-colorzborder-left-stylezborder-left-widthz border-radiusz border-rightzborder-right-colorzborder-right-stylezborder-right-widthzborder-spacingz border-stylez border-topzborder-top-colorzborder-top-left-radiuszborder-top-right-radiuszborder-top-stylezborder-top-widthz border-widthbottomz caption-sidercliprcontentzcounter-incrementz counter-resetcursor directiondisplayz empty-cellsrrz font-familyz font-sizez font-stylez font-variantz font-weightrleftzletter-spacingz line-heightz list-stylezlist-style-imagezlist-style-positionzlist-style-typemarginz margin-bottomz margin-leftz margin-rightz margin-topz max-heightz max-widthz min-heightz min-widthopacityorphansoutlinez outline-colorz outline-stylez outline-widthoverflowpaddingzpadding-bottomz padding-leftz padding-rightz padding-topzpage-break-afterzpage-break-beforezpage-break-insidepositionr"rightz table-layoutz text-alignztext-decorationz text-indentztext-transformtopz unicode-bidizvertical-align visibilityz white-spacewidowsr?z word-spacingzz-index)fileftphttphttpsmailtoN)rr@dynsrcrlowsrcr4zdata:cCs(||_||_||_||_||_||_dS)zyNote: safe_schemes and safe_css have to remain the first parameters, for backward-compatibility purpose. N) safe_tags safe_attrssafe_css uri_attrs safe_schemes safe_origins)rr^r\rZr[r]r_r$r$r%r3s  zTracHTMLSanitizer.__init__uc[eEEe][xXXx][pPPp][rRʀRr][eEEe][sSSs]{2}[iIɪIi][oOOo][nNɴNn]u[Uu][Rrʀ][Llʟ]\s*\(([^)]+)cCs.t|t}|||t|jS)zTransforms the incoming HTML by removing anything's that deemed unsafe. :param html: the input HTML :type: str :return: the sanitized content :rtype: Markup )HTMLSanitizationioStringIOfeedcloseroutgetvalue)rr transformr$r$r%sanitizeds zTracHTMLSanitizer.sanitizecCs8||jvrdS|dkr|dkS|drd|vSdS)z|Determine whether the given css property declaration is to be considered safe for inclusion in the output. FrNstaticrH-T)r\lowerr3)rpropr=r$r$r% is_safe_cssss   zTracHTMLSanitizer.is_safe_csscCs&||jvrdS|dkrd|vrdSdS)aODetermine whether the given element should be considered safe for inclusion in the output. :param tag: the tag name of the element :type tag: str :param attrs: the element attributes :type attrs: list :return: whether the element should be considered safe :rtype: bool Fr)r:passwordT)rZrrrr$r$r% is_safe_elems zTracHTMLSanitizer.is_safe_elemcCsRd|vr |ddd}d|vrdSdd|dddD}d||jvS) a:Determine whether the given URI is to be considered safe for inclusion in the output. The default implementation checks whether the scheme of the URI is in the set of allowed URIs (`safe_schemes`). >>> sanitizer = TracHTMLSanitizer() >>> sanitizer.is_safe_uri('http://example.org/') True >>> sanitizer.is_safe_uri('javascript:alert(document.cookie)') False :param uri: the URI to check :return: `True` if the URI can be considered safe, `False` otherwise :rtype: `bool` #r(r:TcSsg|]}|r|qSr$)isalnum)rmrr$r$r% sz1TracHTMLSanitizer.is_safe_uri..r&)splitrvrkr^)ruricharsr$r$r% is_safe_uris zTracHTMLSanitizer.is_safe_uricCsi}|D]1\}}|dur|}||jvrq||jvr"||s!qn|dkr3||}|s.qd|}|||<q|dkrMd|vrM||dsMd}d||<|S)a!Remove potentially dangerous attributes and sanitize the style attribute . :param tag: the tag name of the element :type attrs: dict corresponding to tag attributes :return: a dict containing only safe or sanitized attributes :rtype: dict Nrar{rr4 crossorigin anonymous)r~r[r]rx sanitize_cssrv_is_safe_origin)rrr new_attrsattrr=declsr$r$r%sanitize_attrss,        z TracHTMLSanitizer.sanitize_attrsc sg}|}td|dD]D}|}|sqz |dd\}}Wn ty.Yqw||s>> sanitizer = TracHTMLSanitizer() >>> sanitizer.sanitize_css(''' ... background: url(javascript:alert("foo")); ... color: #000; ... ''') ['color: #000'] Also, the proprietary Internet Explorer function ``expression()`` is always stripped: >>> sanitizer.sanitize_css(''' ... background: #fff; ... color: #000; ... width: e/**/xpression(alert("F")); ... ''') ['background: #fff', 'color: #000', 'width: e xpression(alert("F"))'] :param text: the CSS text; this is expected to be `str` and to not contain any character or numeric references :return: a list of declarations that are considered safe :rtype: `list` N;rrr(c3s |] }|dVqdS)r(N)r|r2)rmr8rr$r%rqsz1TracHTMLSanitizer.sanitize_css..) _strip_css_comments_replace_unicode_escapesrsrustrip ValueErrorrmrk_EXPRESSION_SEARCHall _URL_FINDITERr})rr!rdeclrlr=r$rr%r{s*   zTracHTMLSanitizer.sanitize_cssz\r\nz8\\([0-9a-fA-F]{1,6})\s?|\\([^\r\n\f0-9a-fA-F'"{};:()#*])cCs||o t|j|Srl)rxrr_)rrvr$r$r%r|s  z!TracHTMLSanitizer._is_safe_origincCsdd}|||d|S)NcSsZ|d}|r t|d}t|}|dkrd}|S|dkrd}|S|d}|dkr+dS|S)Nr(r+rr\z\\r-)r2r4r5)r8trrr$r$r%_repls   z9TracHTMLSanitizer._replace_unicode_escapes.._repl )_UNICODE_ESCAPE_NORMALIZE_NEWLINES)rr!rr$r$r%rs z*TracHTMLSanitizer._replace_unicode_escapesz /\*.*?\*/cCs |d|S)zReplace comments with space character instead of superclass which removes comments to avoid problems when nested comments. rr) _CSS_COMMENTS)rr!r$r$r%rs z%TracHTMLSanitizer._strip_css_commentsN) rrrr frozenset SAFE_TAGS SAFE_ATTRSSAFE_CSS SAFE_SCHEMES URI_ATTRSSAFE_CROSS_ORIGINSrrecompilesearchrfinditerrrhrmrprxrr{r>rUNICODErr|rrrr$r$r$r%r sN     ! /  r c@s(eZdZdZddZddZddZdS) r aHelp base class used for cleaning up HTML riddled with ```` tags and replace them with appropriate ````. The subclass must define a `rules()` static method returning a list of regular expression fragments, each defining a capture group in which the name will be reused for the span's class. Two special group names, ``font`` and ``endfont`` are used to emit ```` and ````, respectively. cCs:t|}t|dstdd||_|j|_|S)N_compiled_rulesz(?:%s)|)object__new__hasattrrrrvr0r)clsrr$r$r%r0s  zDeuglifier.__new__cCst|j|j|Srl)rr>rr )rindatar$r$r%format7zDeuglifier.formatcCsF|D]\}}|r |dkrdS|dkrdSd|SqdS)Nrzendfontzz) groupdictr~)r fullmatchmtyper8r$r$r%r :s zDeuglifier.replaceN)rrrrrrr r$r$r$r%r $s   r c@XeZdZdZddZddZddZdd Zd d Zd d Z ddZ ddZ ddZ dS) HTMLTransformzConvenience base class for writing HTMLParsers. The default implementation of the HTMLParser ``handle_*`` methods do nothing, while in our case we try to rewrite the incoming document unmodified. cCsRt|||_t|tjrdd|_dSt|tjr"dd|_dSdd|_dS)NcSt|tr |dS|SNzutf-8)rrdecoderxr$r$r%rzQ z(HTMLTransform.__init__..cSrr)rrencoderr$r$r%rzTrcSs|Srlr$rr$r$r%rzWs)rrrerra TextIOBase_convertIOBase)rrer$r$r%rMs   zHTMLTransform.__init__cC||dSrl_writeget_starttag_textror$r$r%handle_starttagYrzHTMLTransform.handle_starttagcCrrlrror$r$r%handle_startendtag\rz HTMLTransform.handle_startendtagcC|d|dS)Nz rrdatar$r$r%handle_comment_rzHTMLTransform.handle_commentcCrNzrrr$r$r% handle_declbrzHTMLTransform.handle_declcCr)Nrrr$r$r% handle_pierzHTMLTransform.handle_picCs||dSrlrrr$r$r% handle_datahszHTMLTransform.handle_datacCs|d|ddSNrrrrr$r$r% handle_endtagkrzHTMLTransform.handle_endtagcCs|j||dSrl)rewriterrr$r$r%rnrzHTMLTransform._writeN) rrrrrrrrrrrrrr$r$r$r%rDs  rc@r)r zIdentify and protect forms from CSRF attacks. This filter works by adding a input type=hidden field to POST forms. cCst||||_dSrl)rrtoken)r form_tokenrer$r$r%rys  zFormTokenInjector.__init__cCsZt||||dkr)|D]\}}|dkr(|dkr(|d|jdSqdSdS)Nrr&postz5)rrrkrr)rrrr'r=r$r$r%r}s  z!FormTokenInjector.handle_starttagN)rrrrrrr$r$r$r%r rs r c@r)r`z-Sanitize parsed HTML using TracHTMLSanitizer.cCst||||_d|_dSrl)rr sanitizer waiting_for)rrrer$r$r%rs  zHTMLSanitization.__init__csh|jrdS|j||s||_dS|j|t|dfddtD}|d|||fdS)Nr&c3s$|] }d|t|fVqdS)rNr)rmr'r}r$r%rqsz1HTMLSanitization._handle_start..z<%s%s%s>)rrrprrbrvrur)rrrstartend html_attrsr$rr% _handle_startszHTMLSanitization._handle_startcC|js |||ddSdS)Nr&rrror$r$r%rz HTMLSanitization.handle_starttagcCrN/rror$r$r%rrz#HTMLSanitization.handle_startendtagcCsdSrlr$rr$r$r%rrzHTMLSanitization.handle_commentcCs|js |d|dSdSrrrrr$r$r%rrzHTMLSanitization.handle_declcCs$|js|d|dddSdS)Nrz?>r&)rrr rr$r$r%rszHTMLSanitization.handle_picCs|js |t|dSdSrl)rrrrr$r$r%rrzHTMLSanitization.handle_datacCs4|jr|j|kr d|_dSdS|d|ddSrrrr$r$r%rs   zHTMLSanitization.handle_endtagN) rrrrrrrrrrrrrr$r$r$r%r`s  r`cCs4t|tr |}ntt|}|s|dd}|S)a^Extract the text elements from (X)HTML content >>> plaintext('1 < 2') '1 < 2' >>> plaintext(tag('1 ', tag.b('<'), ' 2')) '1 < 2' >>> plaintext('''1 ... < ... 2''', keeplinebreaks=False) '1 < 2' :param text: `unicode` or `Fragment` :param keeplinebreaks: optionally keep linebreaks rrr)rrrrrr )r!keeplinebreaksr$r$r%rs    rcCst|tr,|dur||jvr|S|dur!||jddvr!|S|dur,||jkr,|St|trF|jD]}t||||}|durE|Sq4dSdS)zReturn the first element in the fragment having the given attribute, class or tag, using a preorder depth-first search. Nr`r&) rrrgetrurrrr)fragr~rrchildrr$r$r%rs   rcs|r d|vr |ds dStdd|DrdS|dr&|r&d|j|f}tdfdd }||}|D]+}||}||krDdS|drQ||rQdS||d rZ|n|d rbdSq7d S) z-Whether the given uri is a safe cross-origin.rrz//Tcss|]}|dkVqdS)*Nr$)rmsafer$r$r%rqsz!is_safe_origin..z%s:%sz&(?:[a-zA-Z][-a-zA-Z0-9+._]*:)?//[^/]+$cs|r |d7}|Sr)r8)rv normalize_rer$r% normalize_uris z%is_safe_origin..normalize_urirF)r3anyschemerrendswith)r_rvreqrrr$rr%rs&  rcCs|t|tst|tr't|jdkr'|jd}t|tst|tr't|jdkstr1t|tr1|j}t|tr8|Stt |S)z%Convert input to a `Fragment` object.r(r) rr Exceptionlenrwrr=rrr )rr$r$r%rs     rccs|] }|dvr|VqdS)) r, Nr$)rmryr$r$r%rqs rq cCs |dtS)zReturn only valid bytes in XML/HTML from the given data. >>> valid_html_bytes(b'blah') b'blah' >>> list(valid_html_bytes(bytes(range(33)) + b'')) [9, 10, 13, 32, 127] N)r]_invalid_control_chars)rr$r$r%rs r)T)Fr$)NNNrl):rarsysrr html.parserr markupsaferrrrr ImportError babel.supportr trac.corer trac.util.textr __all__name2codepointcopyr6rrr=rrNO_YESOFF_ON FALSE_TRUErbrfrkrcrerrrrrxmlrrr r rr r`rrrrrrangerrr$r$r$r%s        ;  0 %'+A  B . 3