;; Leninist.Biz! ;; Emacs-Time-stamp: "2006-12-14 23:41:30" (setq file-stamp "Emacs-File-stamp: \"/home/ysverdlov/leninist.biz/lb-sgml.el\"") ;; ;; ;; ;; See also: "~/Lia/archive/lenin/howto/lia-sgml.el" ;; fix! ;; Need a function to convertion single-byte 8-bit ASCII to named entity. ;; (_-sgml-cdata-to-alphanumerics "Ab;cdé.") => "Abcde" ;; (_-sgml-cdata-to-alphanumerics ".internationalism—these") (defun _-sgml-cdata-to-alphanumerics (arg1str &optional arg2flag-punct) "Change cdata string ARG1 to simplest possible representation using 0-9, a-z, and A-Z to get something easy to type after '#' at end of URL. If optional ARG2 is non-nil, keep punctuation" (let (lb-str lb-assoc-value (lb-rc arg1str)) ;; Delete markup if necessary. (while (string-match "<[^>]+>" lb-rc) (setq lb-rc (replace-match "" nil nil lb-rc 0))) ;; 2006.12.07 ;; Delete ✏. (while (string-match "\\([&][#][0-9][0-9][0-9][0-9];\\)" lb-rc) (setq lb-rc (replace-match "" nil nil lb-rc 1))) ;; Change ϧ numeric entities to named entities. (setq lb-rc (_-sgml-entity-numeric-to-named lb-rc)) ;; Delete certain named entities. (while (string-match (concat "[&]\\(" ;; fix! ;; Set as variable. ;; Function to turn list into parens. "quot" "\\|" "l[sd]quo" "\\|" "r[sd]quo" "\\|" "ls?aquo" "\\|" "rs?aquo" "\\)[;]") lb-rc) (setq lb-rc (replace-match "" nil nil lb-rc 0))) ;; Change named entities to single alphas. (while (string-match "[&]\\([a-zA-Z]\\)[a-zA-Z]+[;]" lb-rc) (setq lb-rc (replace-match (match-string-no-properties 1 lb-rc) nil nil lb-rc 0))) (if (not arg2flag-punct) ;; Then, delete punctuation. (while (and (string-match _-punct-from-keyboard lb-rc) t) (setq lb-rc (replace-match "" nil nil lb-rc 0)))) ;; lb-rc)) ;; (_-sgml-entity-numeric-to-named) ;; (nth 1 (assoc "192" _-sgml-lat1)) => "Agrave" ;; (_-sgml-entity-numeric-to-named "Bürger!") (defun _-sgml-entity-numeric-to-named (&optional arg1str) "" (let (_-str _-assoc-value _-rc) (if (not (boundp '_-sgml-lat1)) ;; (makunbound '_-sgml-lat1) (with-temp-buffer (setq _-sgml-lat1 nil) (loop for file in _-usr_share_sgml_html_dtd_xml_1.0_*.ent do (_-ifcl file)) (goto-char (point-min)) (while (search-forward-regexp (concat "") nil t) (_-app '_-sgml-lat1 (list (list (match-string-no-properties 2) (match-string-no-properties 1))))))) (if (setq _-rc arg1str) (while (and (string-match "[&][#]\\([0-9][0-9][0-9]+\\);" _-rc) (setq _-str (match-string-no-properties 1 _-rc))) (if (setq _-assoc-value (assoc _-str _-sgml-lat1)) (setq _-rc (replace-match (concat "&" (nth 1 _-assoc-value) ";") nil nil _-rc 0)) (error "%s: %s" (concat _-str " not found") (prin1-to-string _-sgml-lat1))))) _-rc)) (_-sgml-entity-numeric-to-named) ;; Initialize _-sgml-lat1 ;; See: (apropos "_-sgml.*-p") (defun _-sgml-comment-p nil "Is point to right of '' ? SEE ALSO _-sgml-markup-p" ;; (let ( _-flag) (save-match-data ;; fix! ;; BY DEFINITION: ;; treat CDATA inside a SPAN with attribute "sic" as a comment! ;; ;; ''estimable'' (save-excursion ;; comment beginning. 2006.09.09. (if (looking-at "[!]?\\([-]+\\)[^>]") (goto-char (match-end 1))) ;; comment ending. 2006.09.09. (while (looking-at "[-]?[>]") (forward-char -1)) (when (search-backward-regexp "[-][-]" nil t) (forward-char -2) (if (and (looking-at "" nil t)) (setq _-flag t))))) _-flag)) (defun _-sgml-scrub nil "Scrub SGML (some <> markup in non-authorish fields (en/1969/LCIA337)" (save-match-data (let (_-rc) (save-excursion (goto-char (point-min)) (while (search-forward-regexp "<<" nil t) (replace-match "")) (goto-char (point-min)) (while (search-forward-regexp ">>" nil t) (replace-match "")))))) ;; See: (apropos "_-sgml.*-p") ;; (_-sgml-cdata-p) (defun _-sgml-cdata-p nil ">...(point)...< returns tagname. <...(point)...> returns nil" (let (_-left _-right _-tagname _-str _-rc) (save-match-data (setq _-left (save-excursion (search-backward-regexp "[><]") (setq _-str (match-string-no-properties 0)) (when (string= ">" _-str) (search-backward "<") (if (looking-at "<\\([a-zA-Z]+\\)") (setq _-tagname (match-string-no-properties 1)))) _-str)) (setq _-right (save-excursion (search-forward-regexp "[><]") (match-string-no-properties 0))) (if (and (not (_-sgml-comment-p)) (string= _-left _-right)) (error "%s: %s" "Expecting pair" (concat _-left " ... " _-right "\n" (_-buffer-substring-from-point) ))) ) (setq _-rc (if (string= ">" _-left) _-tagname)) _-rc)) (provide 'lb-sgml) ;;; ;