o !b@@sdZddlmZddlZddlmZddlZddlmZ m Z ddl m Z mZmZmZddl mZmZmZmZmZmZmZmZmZmZmZddlmZmZgd Zd Z d d Z!Gd dde"Z#Gddde$Z%ddZ&Gddde j'e$Z'dddZ(ddZ)dS)zOSupport for constructing markup streams from files, strings, or other sources. )chainN)expat) html_entities html_parser)AttrsQNameStream stripentities) STARTENDXML_DECLDOCTYPETEXTSTART_NSEND_NS START_CDATA END_CDATAPICOMMENT)StringIOBytesIO)ET ParseError XMLParserXML HTMLParserHTMLzrestructuredtext enccst|jd}tdd|D}t||fdfV|jr&t|jdfV|D] }t|D]}|Vq.q(t |dfV|j rGt|j dfVdSdS)zConvert a given ElementTree element to a markup stream. :param element: an ElementTree element :return: a markup stream {cSs"g|] \}}t|d|fqS)r)rlstrip).0attrvaluer"./usr/lib/python3/dist-packages/genshi/input.py *szET..Nr&N) rtagrritemsr textrrr tail)elementtag_nameattrschilditemr"r"r#r#s   rc@seZdZdZdddZdS)rzWException raised when fatal syntax errors are found in the input being parsed. Nr&cCs<||_|r |d|7}t|||pd|_||_||_dS)a:Exception initializer. :param message: the error message from the parser :param filename: the path to the file that was parsed :param lineno: the number of the line on which the error was encountered :param offset: the column number where the error was encountered z, in zN)msg Exception__init__filenamelinenooffset)selfmessager3r4r5r"r"r#r2=s    zParseError.__init__r%)__name__ __module__ __qualname____doc__r2r"r"r"r#r8src@seZdZdZddejDZde dZ d-ddZ d d Z d d Z d dZd-ddZddZddZddZddZddZddZddZdd Zd!d"Zd#d$Zd%d&Zd'd(Zd)d*Zd+d,ZdS).raGenerator-based XML parser based on roughly equivalent code in Kid/ElementTree. The parsing is initiated by iterating over the parser object: >>> parser = XMLParser(StringIO('Foo')) >>> for kind, data, pos in parser: ... print('%s %s' % (kind, data)) START (QName('root'), Attrs([(QName('id'), '2')])) START (QName('child'), Attrs()) TEXT Foo END child END root cCsg|] \}}d||fqS)zr"rnamer!r"r"r#r$^zXMLParser. utf-8NcCs||_||_t|d}d|_t|drd|_d|_|j|_ |j |_ |j |_ |j|_|j|_|j|_|j|_|j|_|j|_|j|_|j|_|j|_| tj!|"|j#|_$||_g|_%dS)aInitialize the parser for the given XML input. :param source: the XML text as a file-like object :param filename: the name of the file, if appropriate :param encoding: the encoding of the file; if not specified, the encoding is assumed to be ASCII, UTF-8, or UTF-16, or whatever the encoding specified in the XML declaration (if any) }Treturns_unicodeN)&sourcer3r ParserCreate buffer_texthasattrrBordered_attributes _handle_startStartElementHandler _handle_endEndElementHandler _handle_dataCharacterDataHandler_handle_doctypeStartDoctypeDeclHandler_handle_start_nsStartNamespaceDeclHandler_handle_end_nsEndNamespaceDeclHandler_handle_start_cdataStartCdataSectionHandler_handle_end_cdataEndCdataSectionHandler _handle_piProcessingInstructionHandler_handle_xml_declXmlDeclHandler_handle_commentCommentHandler _handle_otherDefaultHandlerSetParamEntityParsingXML_PARAM_ENTITY_PARSING_ALWAYS UseForeignDTD_build_foreignExternalEntityRefHandler_queue)r6rCr3encodingparserr"r"r#r2bs0     zXMLParser.__init__cfdd}t|tS)zGenerator that parses the XML source, yielding markup events. :return: a markup event stream :raises ParseError: if the XML text is not well formed c 3szTd}d} |sDtjdkrDj|}|s)tdr&jdd`d}nt|tj r4| d}j|d|sDtjdksjD]}|VqGg_|rUWdSqtj yp}zt |}t |j|j|jd}~ww) NFrrTr@)lenrerCreadrFrParse isinstancesix text_typeencode ExpatErrorstrrr3r4r5)bufsizedonedataeventer0r6r"r# _generates6     z"XMLParser.parse.._generaterfilter _coalescer6r{r"rzr#parses zXMLParser.parsecC t|SNiterrrzr"r"r#__iter__ zXMLParser.__iter__cCs |j|}|t|jdS)Nrj)rExternalEntityParserCreate ParseFiler _external_dtd)r6contextbasesysidpubidrgr"r"r#rcs zXMLParser._build_foreigncCs~|dur|}|tur4d|vr!|}|dt|d}d}n |d}|dt|}|d||f}|j|||fdS)Nr?rjr&r)_getposr splitlinesrlreappend)r6kindrwposlinesr4r5r"r"r#_enqueueszXMLParser._enqueuecCs |jddfS)Nr&)r3rzr"r"r#_getpos_unknownrzXMLParser._getpos_unknowncCs|j|jj|jjfSr)r3rCurrentLineNumberCurrentColumnNumberrzr"r"r#rs zXMLParser._getposcCs8tddtt|gdD}|tt||fdS)NcSsg|] \}}t||fqSr")rr<r"r"r#r$r>z+XMLParser._handle_start..r)rziprrr r)r6r'attribr-r"r"r#rHszXMLParser._handle_startcCs|tt|dSr)rr r)r6r'r"r"r#rJzXMLParser._handle_endcC|t|dSrrrr6r)r"r"r#rLzXMLParser._handle_datacCs|t|||fdSr)rr )r6versionrf standaloner"r"r#rZzXMLParser._handle_xml_declcCs|t|||fdSr)rr )r6r=rrhas_internal_subsetr"r"r#rNrzXMLParser._handle_doctypecCs|t|pd|fdSNrk)rr)r6prefixurir"r"r#rPszXMLParser._handle_start_nscCs|t|pddSr)rr)r6rr"r"r#rRrzXMLParser._handle_end_nscC|tdSr)rrrzr"r"r#rTzXMLParser._handle_start_cdatacCrr)rrrzr"r"r#rVrzXMLParser._handle_end_cdatacCs|t||fdSr)rr)r6targetrwr"r"r#rXrzXMLParser._handle_picCrrrrrr"r"r#r\rzXMLParser._handle_commentc Cs|dr@zttj|dd}|t|WdSty?|\}}}t d|||f}t j j |_ ||_||_|wdS)N&rjr&z)undefined entity "%s": line %d, column %d) startswithrpunichrentitiesname2codepointrrKeyErrorrrerrorerrorsXML_ERROR_UNDEFINED_ENTITYcoder4r5)r6r)r3r4r5rr"r"r#r^s   zXMLParser._handle_otherNN)r8r9r:r;rrr( _entitydefsjoinrrrr2rrrcrrrrHrJrLrZrNrPrRrTrVrXr\r^r"r"r"r#rNs2 +   rcCstttt|S)a?Parse the given XML source and return a markup stream. Unlike with `XMLParser`, the returned stream is reusable, meaning it can be iterated over multiple times: >>> xml = XML('FooBar') >>> print(xml) FooBar >>> print(xml.select('elem')) FooBar >>> print(xml.select('elem/text()')) FooBar :param text: the XML source :return: the parsed XML event stream :raises ParseError: if the XML text is not well-formed )rlistrr)r)r"r"r#rsrc@seZdZdZegdZdddZddZdd Zdd d Z d d Z ddZ ddZ ddZ ddZddZddZddZdS)raUParser for HTML input based on the Python `HTMLParser` module. This class provides the same interface for generating stream events as `XMLParser`, and attempts to automatically balance tags. The parsing is initiated by iterating over the parser object: >>> parser = HTMLParser(BytesIO(u'
  • Foo
'.encode('utf-8')), encoding='utf-8') >>> for kind, data, pos in parser: ... print('%s %s' % (kind, data)) START (QName('ul'), Attrs([(QName('compact'), 'compact')])) START (QName('li'), Attrs()) TEXT Foo END li END ul ) arearbasefontbrcolframehrimginputisindexlinkmetaparamNcCs.tj|||_||_||_g|_g|_dS)aInitialize the parser for the given HTML input. :param source: the HTML text as a file-like object :param filename: the name of the file, if known :param filename: encoding of the file; ignored if the input is unicode N)htmlrr2rCr3rfre _open_tags)r6rCr3rfr"r"r#r2-s  zHTMLParser.__init__crh)zGenerator that parses the HTML source, yielding markup events. :return: a markup event stream :raises ParseError: if the HTML text is not well formed c 3s*jrtj}|j}nj}z`d}d} |sHtjdkrH||}|s0d}nt|t j s:t d ||sHtjdks"jD] \}}}|||fVqKg_|rsj }||D] }tt||fVqeWdSqtjy} zd| j| j| jf} t| j| j| jd} ~ ww)NriFrjrTz0source returned bytes, but no encoding specifiedz%s: line %d, column %d)rfcodecs getreaderrCrlrermcloserorprq UnicodeErrorfeedrreverser rrHTMLParseErrorr0r4r5rr3) readerrCrurvrwrr open_tagsr'ryr0rzr"r#r{AsB      z#HTMLParser.parse.._generater|rr"rzr#r;s zHTMLParser.parsecCrrrrzr"r"r#rbrzHTMLParser.__iter__cCs&|dur|}|j|||fdSr)rrer)r6rrwrr"r"r#reszHTMLParser._enqueuecCs|\}}|j||fSr)getposr3)r6r4columnr"r"r#rjs  zHTMLParser._getposcCszg}|D]\}}|dur|}|t|t|fq|tt|t|f||jvr5|tt|dS|j|dSr) rrr rr r _EMPTY_ELEMSr r)r6r'r fixed_attribr=r!r"r"r#handle_starttagns  zHTMLParser.handle_starttagcCsP||jvr$|jr&|j}|tt|||krdS|jsdSdSdSr)rrpoprr rlower)r6r'open_tagr"r"r# handle_endtag{s   zHTMLParser.handle_endtagcCrrrrr"r"r# handle_datarzHTMLParser.handle_datacCsF|drtt|ddd}ntt|}|t|dS)Nxrj)rrrprintrrr6r=r)r"r"r#handle_charrefszHTMLParser.handle_charrefcCs@z ttj|}Wn tyd|}Ynw|t|dS)Nz&%s;)rprrrrrrrr"r"r#handle_entityrefs   zHTMLParser.handle_entityrefcCsb|dr |dd}z |dd\}}Wn ty"|}d}Ynw|t||fdS)N?r&rjrk)endswithsplit ValueErrorrrstrip)r6rwrr"r"r# handle_pis   zHTMLParser.handle_picCrrrrr"r"r#handle_commentrzHTMLParser.handle_commentrr)r8r9r:r; frozensetrr2rrrrrrrrrrrr"r"r"r#rs  '   rcCs<t|tjrtttt|ddStttt||dS)aMParse the given HTML source and return a markup stream. Unlike with `HTMLParser`, the returned stream is reusable, meaning it can be iterated over multiple times: >>> html = HTML('

Foo

', encoding='utf-8') >>> print(html)

Foo

>>> print(html.select('h1'))

Foo

>>> print(html.select('h1/text()')) Foo :param text: the HTML source :return: the parsed XML event stream :raises ParseError: if the HTML text is not well-formed, and error recovery fails N)rf)rorprqrrrrr)r)rfr"r"r#rs rccszg}d}t|dgD]/\}}}|tur |||dur|}q |r2td||fV|dd=d}|r:|||fVq dS)z3Coalesces adjacent TEXT events into a single event.N)NNNrk)rrrr)streamtextbuftextposrrwrr"r"r#r~s"   r~r)*r; itertoolsrr xml.parsersrrp six.movesrrrr genshi.corerrrr r r r r rrrrrrr genshi.compatrr__all__ __docformat__rr1robjectrrrrr~r"r"r"r#s(  45