;ELC ;;; Compiled ;;; in Emacs version 27.1 ;;; with all optimizations. ;;; This file contains utf-8 non-ASCII characters, ;;; and so cannot be loaded into Emacs 22 or earlier. (and (boundp 'emacs-version) (< (aref emacs-version (1- (length emacs-version))) ?A) (string-lessp emacs-version "23") (error "`%s' was compiled for Emacs 23 or later" #$)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defvar html2text-format-single-element-list '(("hr" . html2text-clean-hr))) #@344 The map of entity to text. This is an alist were each element is a dotted pair consisting of an old string, and a replacement string. This replacement is done by the function `html2text-substitute' which basically performs a `replace-string' operation for every element in the list. This is completely verbatim - without any use of REGEXP. (defvar html2text-replace-list '(("´" . "`") ("&" . "&") ("'" . "'") ("¦" . "|") ("¢" . "c") ("ˆ" . "^") ("©" . "(C)") ("¤" . "(#)") ("°" . "degree") ("÷" . "/") ("€" . "e") ("½" . "1/2") (">" . ">") ("¿" . "?") ("«" . "<<") ("&ldquo" . "\"") ("‹" . "(") ("‘" . "`") ("<" . "<") ("—" . "--") (" " . " ") ("–" . "-") ("‰" . "%%") ("±" . "+-") ("£" . "£") (""" . "\"") ("»" . ">>") ("&rdquo" . "\"") ("®" . "(R)") ("›" . ")") ("’" . "'") ("§" . "§") ("¹" . "^1") ("²" . "^2") ("³" . "^3") ("˜" . "~")) (#$ . 486)) #@577 A list of removable tags. This is a list of tags which should be removed, without any formatting. Note that tags in the list are presented *without* any "<" or ">". All occurrences of a tag appearing in this list are removed, irrespective of whether it is a closing or opening tag, or if the tag has additional attributes. The deletion is done by the function `html2text-remove-tags'. For instance the text: "Here comes something big ." will be reduced to: "Here comes something big." If this list contains the element "font". (defvar html2text-remove-tag-list '("html" "body" "p" "img" "dir" "head" "div" "br" "font" "title" "meta") (#$ . 1540)) #@472 An alist of tags and processing functions. This is an alist where each dotted pair consists of a tag, and then the name of a function to be called when this tag is found. The function is called with the arguments p1, p2, p3 and p4. These are demonstrated below: " This is bold text " ^ ^ ^ ^ | | | | p1 p2 p3 p4 Then the called function will typically format the text somewhat and remove the tags. (defvar html2text-format-tag-list '(("b" . html2text-clean-bold) ("strong" . html2text-clean-bold) ("u" . html2text-clean-underline) ("i" . html2text-clean-italic) ("em" . html2text-clean-italic) ("blockquote" . html2text-clean-blockquote) ("a" . html2text-clean-anchor) ("ul" . html2text-clean-ul) ("ol" . html2text-clean-ol) ("dl" . html2text-clean-dl) ("center" . html2text-clean-center)) (#$ . 2243)) #@201 Another list of removable tags. This is a list of tags which are removed similarly to the list `html2text-remove-tag-list' - but these tags are retained for the formatting, and then moved afterward. (defvar html2text-remove-tag-list2 '("li" "dt" "dd" "meta") (#$ . 3126)) #@63 Replace FROM-STRING with TO-STRING in region from MIN to MAX. (defalias 'html2text-replace-string #[(from-string to-string min max) "b\210\306 !\306\n!Z\307\310\n \311#\203 \312 !\210 \f\\\202 *\207" [min to-string from-string change delta max string-width 0 search-forward t replace-match] 4 (#$ . 3405)]) #@35 Get value of ATTRIBUTE from LIST. (defalias 'html2text-attr-value #[(list attribute) "\302 \"A@\207" [attribute list assoc] 3 (#$ . 3725)]) (defalias 'html2text-get-attr #[(p1 p2) "b\210\303\304 \305#\210\306\303\307 \305#\203>\310\311!\312\313!\2057\311\225b\210\314\306w\210\312\315!\2040\312\316!\2057\311\225b\210\310\311!D\nB\202 \n)\207" [p1 p2 attr-list re-search-forward "\\s-+" t nil "[-a-z0-9._]+" match-string 0 looking-at "\\s-*=" "[:space:]" "\"[^\"]*\"\\|'[^']*'" "[-a-z0-9._:]+"] 4]) (defalias 'html2text-clean-list-items #[(p1 p2 list-type) "b\210\305\211\306\307 \310#\203 T\202b\210\n W\205H\nT\306\307d\310#\210\f\311\230\2033\312c\210\202\f\313\230\203B\314\315\n\"c\210\202\316c\210\202*\207" [p1 items item-nr p2 list-type 0 search-forward "
  • " t "ul" " o " "ol" format " %s: " " x "] 4]) (defalias 'html2text-clean-dtdd #[(p1 p2) "b\210\306\211\307\310 \311#\203\nT\202b\210 \nW\205\202 T\312\313d\311#\210\314\315!\2037``\316\314\315!!Z|\210`\306\312\317d\311#\210\314\315!\203o\316\314\315!!\316\314\320!!\\`Zb\210``\\|\210`+\202w`\316\314\320!!Z\321 \f\322\323$\210*\202*\207" [p1 item-nr items p2 def-p2 def-p1 0 search-forward "
    " t re-search-forward "
    \\([ ]*\\)" match-string 1 string-width "\\([ ]*\\)\\(
    \\|
    \\)" 2 put-text-property face bold mw1 mw2 mw] 5]) (defalias 'html2text-delete-tags #[(p1 p2 p3 p4) " |\210\n ZZ ZZ|\207" [p1 p2 p3 p4] 4]) (defalias 'html2text-delete-single-tag #[(p1 p2) " |\207" [p1 p2] 2]) (defalias 'html2text-clean-hr #[(p1 p2) "\303 \"\210b\210\304\305!\210\306\n\307\"c\207" [p1 p2 fill-column html2text-delete-single-tag newline 1 make-string 45] 3]) (defalias 'html2text-clean-ul #[(p1 p2 p3 p4) "\304 \n $\210\305\n ZZ\306#\207" [p1 p2 p3 p4 html2text-delete-tags html2text-clean-list-items "ul"] 5]) (defalias 'html2text-clean-ol #[(p1 p2 p3 p4) "\304 \n $\210\305\n ZZ\306#\207" [p1 p2 p3 p4 html2text-delete-tags html2text-clean-list-items "ol"] 5]) (defalias 'html2text-clean-dl #[(p1 p2 p3 p4) "\304 \n $\210\305\n ZZ\"\207" [p1 p2 p3 p4 html2text-delete-tags html2text-clean-dtdd] 5]) (defalias 'html2text-clean-center #[(p1 p2 p3 p4) "\304 \n $\210\305\n ZZ\"\207" [p1 p2 p3 p4 html2text-delete-tags center-region] 5]) (defalias 'html2text-clean-bold #[(p1 p2 p3 p4) "\304 \305\306$\210\307\n $\207" [p2 p3 p1 p4 put-text-property face bold html2text-delete-tags] 5]) (defalias 'html2text-clean-title #[(p1 p2 p3 p4) "\304 \305\306$\210\307\n $\207" [p2 p3 p1 p4 put-text-property face bold html2text-delete-tags] 5]) (defalias 'html2text-clean-underline #[(p1 p2 p3 p4) "\304 \305\306$\210\307\n $\207" [p2 p3 p1 p4 put-text-property face underline html2text-delete-tags] 5]) (defalias 'html2text-clean-italic #[(p1 p2 p3 p4) "\304 \305\306$\210\307\n $\207" [p2 p3 p1 p4 put-text-property face italic html2text-delete-tags] 5]) (defalias 'html2text-clean-font #[(p1 p2 p3 p4) "\304 \n $\207" [p1 p2 p3 p4 html2text-delete-tags] 5]) (defalias 'html2text-clean-blockquote #[(p1 p2 p3 p4) "\304 \n $\207" [p1 p2 p3 p4 html2text-delete-tags] 5]) (defalias 'html2text-clean-anchor #[(p1 p2 _p3 p4) "\305 \"\306\n\307\"\f|\210 \205-b\210\310\311 \"\203$\312 \313\314#\202% c\210\315`\316\317$*\207" [p1 p2 attr-list href p4 html2text-get-attr html2text-attr-value "href" string-match "\\`['\"].*['\"]\\'" substring 1 -1 put-text-property face bold] 5]) (defalias 'html2text-fix-paragraph #[(p1 p2) "b\210\304\211\305\306 \307#\203<b\210\305\310 \307#\203<\311 \210` b\210\312\310\n\307#\210\313y\210\304\210`\314\315\316\n`$\\\317\n \"\210*\314\315\316 $\207" [p1 refill-stop refill-start p2 nil re-search-forward "
    $" t ".+[^<][^b][^r][^>]$" beginning-of-line re-search-backward 1 html2text-replace-string "
    " "" fill-region] 6]) #@149 This _tries_ to fix up the paragraphs - this is done in quite a ad-hook fashion, quite close to pure guess-work. It does work in some cases though. (defalias 'html2text-fix-paragraphs #[nil "eb\210\301\302\303\304#\203\305\224\305\225|\210\202eb\210m?\2057`\306\307!\210\310`S\"\210b\210m\2043\306\307!\210)\202\207" [p1 re-search-forward "^
    $" nil t 0 forward-paragraph 1 html2text-fix-paragraph] 4 (#$ . 7568) nil]) #@107 Removes the tags listed in the list `html2text-remove-tag-list'. See the documentation for that variable. (defalias 'html2text-remove-tags #[(tag-list) "\303\211\205*\n@eb\210\304\305\306 \"d\307#\203\"\310\224\310\225|\210\202\nA\211\204\303*\207" [tag-list tag --dolist-tail-- nil re-search-forward format "\\(]*>\\)" t 0] 5 (#$ . 8007) nil]) #@65 See the variable `html2text-format-tag-list' for documentation. (defalias 'html2text-format-tags #[nil "\306\211\205n\n@\211@ Aeb\210\307\310\311\f\"d\312#\203e\306`\306\211\313\314e\312#\210`\315\310\316\f\"d\312#\204Hb\210\310\316\f\"c\210`\313\317e\312#\210`  $\210b\210,\202*\nA\211\204\306*\207" [html2text-format-tag-list tag-and-function --dolist-tail-- function tag p4 nil re-search-forward format "\\(<%s\\( [^>]*\\)?>\\)" t search-backward "<" search-forward "" "]*\\)?>\\)" t search-backward "<" p1] 5 nil nil]) #@51 Convert HTML to plain text in the current buffer. (defalias 'html2text #[nil "\212\304\305\306\n!\210\307 \210\306 !\210\310 \210\311 \210\312 +\207" [buffer-read-only case-fold-search html2text-remove-tag-list html2text-remove-tag-list2 t nil html2text-remove-tags html2text-format-tags html2text-substitute html2text-format-single-elements html2text-fix-paragraphs] 2 (#$ . 9591) nil]) (provide 'html2text)