;; Leninist.Biz!
;; Emacs-Time-stamp: "2006-12-14 23:41:30"
(setq file-stamp "Emacs-File-stamp: \"/home/ysverdlov/leninist.biz/lb-sgml.el\"")
;;
;;
;;
;; See also: "~/Lia/archive/lenin/howto/lia-sgml.el"
;; fix!
;; Need a function to convertion single-byte 8-bit ASCII to named entity.
;; (_-sgml-cdata-to-alphanumerics "Ab;cdé.") => "Abcde"
;; (_-sgml-cdata-to-alphanumerics ".internationalism—these")
(defun _-sgml-cdata-to-alphanumerics (arg1str &optional arg2flag-punct)
"Change cdata string ARG1 to simplest possible representation using
0-9, a-z, and A-Z to get something easy to type after '#' at end of URL.
If optional ARG2 is non-nil, keep punctuation"
(let (lb-str lb-assoc-value
(lb-rc arg1str))
;; Delete markup if necessary.
(while (string-match "<[^>]+>" lb-rc)
(setq lb-rc (replace-match "" nil nil lb-rc 0)))
;; 2006.12.07
;; Delete ✏.
(while (string-match "\\([&][#][0-9][0-9][0-9][0-9];\\)" lb-rc)
(setq lb-rc (replace-match "" nil nil lb-rc 1)))
;; Change ϧ numeric entities to named entities.
(setq lb-rc (_-sgml-entity-numeric-to-named lb-rc))
;; Delete certain named entities.
(while (string-match (concat "[&]\\("
;; fix!
;; Set as variable.
;; Function to turn list into parens.
"quot" "\\|"
"l[sd]quo" "\\|"
"r[sd]quo" "\\|"
"ls?aquo" "\\|"
"rs?aquo"
"\\)[;]") lb-rc)
(setq lb-rc (replace-match "" nil nil lb-rc 0)))
;; Change named entities to single alphas.
(while (string-match "[&]\\([a-zA-Z]\\)[a-zA-Z]+[;]" lb-rc)
(setq lb-rc (replace-match
(match-string-no-properties 1 lb-rc) nil nil lb-rc 0)))
(if (not arg2flag-punct)
;; Then, delete punctuation.
(while (and (string-match _-punct-from-keyboard lb-rc)
t)
(setq lb-rc (replace-match "" nil nil lb-rc 0))))
;;
lb-rc))
;; (_-sgml-entity-numeric-to-named)
;; (nth 1 (assoc "192" _-sgml-lat1)) => "Agrave"
;; (_-sgml-entity-numeric-to-named "Bürger!")
(defun _-sgml-entity-numeric-to-named (&optional arg1str)
""
(let (_-str _-assoc-value
_-rc)
(if (not (boundp '_-sgml-lat1)) ;; (makunbound '_-sgml-lat1)
(with-temp-buffer
(setq _-sgml-lat1 nil)
(loop for file in _-usr_share_sgml_html_dtd_xml_1.0_*.ent
do
(_-ifcl file))
(goto-char (point-min))
(while (search-forward-regexp
(concat "")
nil t)
(_-app '_-sgml-lat1
(list (list (match-string-no-properties 2)
(match-string-no-properties 1)))))))
(if (setq _-rc arg1str)
(while (and (string-match "[&][#]\\([0-9][0-9][0-9]+\\);" _-rc)
(setq _-str (match-string-no-properties 1 _-rc)))
(if (setq _-assoc-value (assoc _-str _-sgml-lat1))
(setq _-rc (replace-match
(concat
"&" (nth 1 _-assoc-value) ";") nil nil _-rc 0))
(error "%s: %s" (concat _-str " not found")
(prin1-to-string _-sgml-lat1)))))
_-rc))
(_-sgml-entity-numeric-to-named) ;; Initialize _-sgml-lat1
;; See: (apropos "_-sgml.*-p")
(defun _-sgml-comment-p nil
"Is point to right of '' ?
SEE ALSO _-sgml-markup-p"
;;
(let (
_-flag)
(save-match-data
;; fix!
;; BY DEFINITION:
;; treat CDATA inside a SPAN with attribute "sic" as a comment!
;;
;; ''estimable''
(save-excursion
;; comment beginning. 2006.09.09.
(if (looking-at "[!]?\\([-]+\\)[^>]") (goto-char (match-end 1)))
;; comment ending. 2006.09.09.
(while (looking-at "[-]?[>]") (forward-char -1))
(when (search-backward-regexp "[-][-]" nil t)
(forward-char -2)
(if (and (looking-at "" nil t))
(setq _-flag t)))))
_-flag))
(defun _-sgml-scrub nil
"Scrub SGML (some <> markup in non-authorish fields (en/1969/LCIA337)"
(save-match-data
(let (_-rc)
(save-excursion
(goto-char (point-min))
(while (search-forward-regexp "<<" nil t)
(replace-match ""))
(goto-char (point-min))
(while (search-forward-regexp ">>" nil t)
(replace-match ""))))))
;; See: (apropos "_-sgml.*-p")
;; (_-sgml-cdata-p)
(defun _-sgml-cdata-p nil
">...(point)...< returns tagname.
<...(point)...> returns nil"
(let (_-left _-right _-tagname _-str
_-rc)
(save-match-data
(setq _-left (save-excursion
(search-backward-regexp "[><]")
(setq _-str (match-string-no-properties 0))
(when (string= ">" _-str)
(search-backward "<")
(if (looking-at "<\\([a-zA-Z]+\\)")
(setq _-tagname (match-string-no-properties 1))))
_-str))
(setq _-right (save-excursion
(search-forward-regexp "[><]")
(match-string-no-properties 0)))
(if (and (not (_-sgml-comment-p))
(string= _-left _-right))
(error "%s: %s" "Expecting pair"
(concat _-left " ... " _-right
"\n"
(_-buffer-substring-from-point)
)))
)
(setq _-rc
(if (string= ">" _-left) _-tagname))
_-rc))
(provide 'lb-sgml)
;;;
;