;;; Emacs-Time-stamp: "2007-07-29 12:57:01" ;;; Emacs-File-stamp: "/home/ysverdlov/leninist.biz/lia-sgml.el" ;;; ############################################ ;; Copied here from tx2html.el (2005.10.14) (unless (featurep 'lia_qual) (load "lia_qual")) ;;; (load-file (concat "~/"my-dns-mia"/archive/lenin/howto/lia_qual.el"))) ;; (_-sgml-grab-volume-anchor-titles) (defun _-sgml-grab-volume-anchor-titles nil "With a volume[0-4][0-9].htm in current buffer, write CDATA from A anchors to tab-delimited file in temporary directory with name= attribute values in column 2 1. dired v45pp76; M-x _-tx-grab-volume-__-titles 1b. sort buffer and write /tmp/tx 2. find-file-read-only volume45.htm; M-x _-sgml-grab-volume-anchor-titles 2b. sort buffer and write /tmp/htm 3. login@debian:/tmp$ join -v1 -t' ' tx htm > v1 3b. login@debian:/tmp$ join -v2 -t' ' tx htm > v2 NOTE: QUOTED INSERT: Ctrl-q in emacs; Ctrl-v in Bash. SEE: _-tx-grab-volume-__-titles" (interactive) (let (_-bfn _-bfn-origingal _-bfn-vol _-pt0 _-attr-value (_-re-bfn "/\\(volume[0-4][0-9][.]htm\\)$") _-rc) (setq _-bfn-origingal (buffer-file-name)) (if (not (string-match _-re-bfn _-bfn-origingal)) (error "%s: %s" "Expecting buffer-file-name" _-re-bfn) (setq _-bfn-vol (match-string-no-properties 1)) (find-file (with-temp-file (setq _-bfn (concat temporary-file-directory (make-temp-name "t2h-"))) (insert-file-contents-literally _-bfn-origingal) (write-region (point-min) (point-max) _-bfn) _-bfn))) ;; Delete everything except A anchors. (progn (setq _-pt0 (goto-char (point-min))) (while (search-forward-regexp "") (if (looking-at "[ \t\n\r]+") (replace-match ""))) (search-forward "") (newline 1) (setq _-pt0 (point))) (delete-region _-pt0 (point-max))) ;; Replace blank lines. (progn (goto-char (point-min)) (while (search-forward-regexp "[\n][ \t\n\r]*[\n]" nil t) (replace-match ""))) ;; Replace newlines not after "". (progn (goto-char (point-min)) (while (search-forward-regexp (concat "\\(....\\)" "\\([\n][ \t\n\r]*\\)") nil t) (if (not (string= (upcase "") (upcase (match-string-no-properties 1)))) (replace-match " " t t nil 2)))) ;; Compress spaces. (progn (goto-char (point-min)) (while (search-forward-regexp "[ \t\r][ \t\r]+" nil t) (replace-match " "))) ;; Delete trailing spaces. (progn (goto-char (point-min)) (while (search-forward-regexp "[ \t\n\r]+" nil t) (replace-match ""))) ;; Change to tab-delimited. (progn (goto-char (point-min)) (while (search-forward-regexp "") (delete-region _-pt0 (point)) (search-forward "") (replace-match (concat " " _-attr-value)))) ;; Delete "99k" (file sizes) (progn (goto-char (point-min)) (flush-lines "^[0-9]+k[\t]$")) _-rc)) ;;; ############################################ ;; (setq x (lia-sgml-make-lftp-source-file 34)) (defun lia-sgml-make-lftp-source-file (arg1voln) "" (let (input-list path path-list path-remote lftp-script-path lftp-script _rc) (if (integerp arg1voln) (setq arg1voln (int-to-string arg1voln))) ;; NOTE: Must remove it first before this will work: ;; $ rm -fR /tmp/volume34/ ;; $ rm -fR /mnt/win/tmp/volume34/ (setq input-list (lia-sgml-copy-hrefs-volume arg1voln ;;t )) (setq lftp-script-path (car input-list)) (setq lftp-script-path (substring lftp-script-path 0 (1+ (string-match "/" lftp-script-path (string-match "volume" lftp-script-path))))) (setq lftp-script (concat lftp-script-path "lftp.bat")) (with-temp-buffer (loop for file in input-list do (setq path (substring file 0 (string-match "[^/]+$" file))) (setq path-remote (substring path (length lftp-script-path))) (when (not (member path-remote path-list)) (_-app 'path-list path-remote) (setq path-remote-full (concat "/leninist.biz/data/marx.org/" path-remote)) (insert "lcd " path "\n" "mkdir -p " path-remote-full "\n" "cd " path-remote-full "\n" "mput *" "\n")) ) (write-region (point-min) (point-max) lftp-script)) (setq _rc lftp-script) _rc)) ;; (setq x (lia-sgml-copy-hrefs-volume 34)) (defun lia-sgml-copy-hrefs-volume (arg1voln &optional arg2nocopy) "" (let (rc pf file file-target tfd folder) (if (integerp arg1voln) (setq arg1voln (int-to-string arg1voln))) (setq tfd (concat (if t "/mnt/win") temporary-file-directory "volume" arg1voln)) (if (not arg2nocopy) (make-directory tfd t)) (loop for pf in (lia-sgml-get-hrefs-volume arg1voln) do (setq file (substring pf (+ (string-match "[.]org/" pf) (length ".org/")))) (setq folder (concat tfd "/" (file-name-directory file))) (if (and (not arg2nocopy) (not (file-directory-p folder))) (make-directory folder t)) (setq file-target (concat tfd "/" file)) (_-app 'rc file-target) (when (and (not (file-exists-p file-target)) (not arg2nocopy)) (copy-file pf file-target))) rc)) ;; (setq x (lia-sgml-get-hrefs-volume 24)) ;; (setq x (lia-sgml-get-hrefs-volume 34)) (defun lia-sgml-get-hrefs-volume (arg1voln) "Return list of pathfiles for all HTML documents in volume ARG1. Does not assume index file, volume99.htm, has links to all HTML, therefore HTML documents are read from ``index.htm'' links in volume99.htm. CHECK: grep: 999 CHECK: volume99.htl ../1895/nov/00pba.htm 34 parent 20-22 letter ../1895/nov/15pba.htm 34 parent 23 letter ../1897/aug/16pba.htm 34 parent 24 letter ... " (let (pf pf-1 href href-base-0 href-base-1 pf-list volume-index rc) (if (integerp arg1voln) (setq arg1voln (int-to-string arg1voln))) (setq volume-index (concat "~/"my-dns-mia"/archive/lenin/works/cw/volume" arg1voln "." t2h-ext-html)) (with-temp-buffer (_-ifcl volume-index) (setq href-base-0 (file-name-directory volume-index)) (goto-char (point-min)) (while (search-forward-regexp (concat "href=." "\\(" "[.][.]/1[89]" "[^'\"]+" "[.]" t2h-ext-html "\\)") nil t) (setq pf (concat href-base-0 (setq href (match-string-no-properties 1)))) (_-app 'pf-list (concat pf "")) ;; Sometimes children of index.htm are NOT linked from volume99.htm! (when (string-match (concat "/index." t2h-ext-html "$") pf) (with-temp-buffer (_-ifcl pf) (setq href-base-1 (file-name-directory pf)) (goto-char (point-min)) (while (search-forward-regexp (concat "href=." "\\(" "[^.#]" "[^'\"]" "[.]" t2h-ext-html "\\)") nil t) (setq pf-1 (concat href-base-1 (match-string-no-properties 1))) (if (not (member pf-1 pf-list)) (_-app 'pf-list (concat pf-1 "")))))))) (loop for pf in pf-list do ;; Remove "/../" in path. (while (string-match "/[^/]+/[.][.]/" pf) (setq pf (replace-match "/" nil nil pf))) (_-app 'rc pf)) rc ;; pf-list )) (defun t2h-sgml-toc-numbered-letters nil "DO NOT USE! Run-once script to add anchors to lenin/works/cw/volume3[45].htm. Put point at beginning of name-of-month, then run this interactive function" (interactive "*") (let (t2h-letx mmm yyyy nnnn filename (vv (progn (string-match t2h-regexp-htm-volume99 (buffer-file-name)) (match-string 1 (buffer-file-name))))) ;; Get the month. ;; 1. P. B. AXELROD. Beginning of November (unless (member (setq mmm (downcase (buffer-substring (point) (+ 3 (point))))) '("jan" "feb" "mar" "apr" "may" "jun" "jul" "aug" "sep" "oct" "nov" "dec")) (error "Place point at beginning of month name in volume34/35.htm")) (save-excursion (search-backward "<") (unless (looking-at "<[eE][mM][^a-zA-Z]") (error "Point must be inside EM element!"))) ;; Get the year from above H9 element. (save-excursion (search-backward-regexp "<[hH]") (search-forward ">") (unless (looking-at "[0-9]") (error (concat "Expecting H9 header element at: " (number-to-string (point)) ": " (buffer-file-name) ""))) (if (not (search-forward-regexp "[ \t]*\\([0-9][0-9][0-9][0-9]\\)[ \t]*<" nil t)) (error "Expecting a year after a H9 element") (setq yyyy (match-string 1)))) ;; Insert A element. (save-excursion (search-backward-regexp "<[tT[dD]") (search-forward ">") (if (not (search-forward-regexp "[ \t\n]*\\([0-9]+\\)[.]" nil t)) (error "Expecting a 999. at beginning of TD") (setq nnnn (match-string 1))) (goto-char (match-beginning 1)) ;; Insert opening A tag. (setq filename (concat "l" (format "%04d" (string-to-number nnnn)) "v" vv)) (insert "") (search-forward-regexp "<[eE][mM]") (if (not (search-backward-regexp "[.][ \t\n]+" nil t)) (error "Expecting period at end of last name!")) (insert "")))) (defvar t2h-hist-name-initial-contents "1911-feb" "see t2h-sgml-toc-anchor-title") (defun t2h-sgml-toc-anchor-title (&optional arg2) ;; insert A manually before href inserted. "Toss an 'A' anchor around title at point, beginning with