;;; Emacs-Time-stamp: "2007-07-29 12:57:01" ;;; Emacs-File-stamp: "/home/ysverdlov/leninist.biz/lia-sgml.el" ;;; ############################################ ;; Copied here from tx2html.el (2005.10.14) (unless (featurep 'lia_qual) (load "lia_qual")) ;;; (load-file (concat "~/"my-dns-mia"/archive/lenin/howto/lia_qual.el"))) ;; (_-sgml-grab-volume-anchor-titles) (defun _-sgml-grab-volume-anchor-titles nil "With a volume[0-4][0-9].htm in current buffer, write CDATA from A anchors to tab-delimited file in temporary directory with name= attribute values in column 2 1. dired v45pp76; M-x _-tx-grab-volume-__-titles 1b. sort buffer and write /tmp/tx 2. find-file-read-only volume45.htm; M-x _-sgml-grab-volume-anchor-titles 2b. sort buffer and write /tmp/htm 3. login@debian:/tmp$ join -v1 -t' ' tx htm > v1 3b. login@debian:/tmp$ join -v2 -t' ' tx htm > v2 NOTE: QUOTED INSERT: Ctrl-q in emacs; Ctrl-v in Bash. SEE: _-tx-grab-volume-__-titles" (interactive) (let (_-bfn _-bfn-origingal _-bfn-vol _-pt0 _-attr-value (_-re-bfn "/\\(volume[0-4][0-9][.]htm\\)$") _-rc) (setq _-bfn-origingal (buffer-file-name)) (if (not (string-match _-re-bfn _-bfn-origingal)) (error "%s: %s" "Expecting buffer-file-name" _-re-bfn) (setq _-bfn-vol (match-string-no-properties 1)) (find-file (with-temp-file (setq _-bfn (concat temporary-file-directory (make-temp-name "t2h-"))) (insert-file-contents-literally _-bfn-origingal) (write-region (point-min) (point-max) _-bfn) _-bfn))) ;; Delete everything except A anchors. (progn (setq _-pt0 (goto-char (point-min))) (while (search-forward-regexp "") (if (looking-at "[ \t\n\r]+") (replace-match ""))) (search-forward "") (newline 1) (setq _-pt0 (point))) (delete-region _-pt0 (point-max))) ;; Replace blank lines. (progn (goto-char (point-min)) (while (search-forward-regexp "[\n][ \t\n\r]*[\n]" nil t) (replace-match ""))) ;; Replace newlines not after "". (progn (goto-char (point-min)) (while (search-forward-regexp (concat "\\(....\\)" "\\([\n][ \t\n\r]*\\)") nil t) (if (not (string= (upcase "") (upcase (match-string-no-properties 1)))) (replace-match " " t t nil 2)))) ;; Compress spaces. (progn (goto-char (point-min)) (while (search-forward-regexp "[ \t\r][ \t\r]+" nil t) (replace-match " "))) ;; Delete trailing spaces. (progn (goto-char (point-min)) (while (search-forward-regexp "[ \t\n\r]+" nil t) (replace-match ""))) ;; Change to tab-delimited. (progn (goto-char (point-min)) (while (search-forward-regexp "") (delete-region _-pt0 (point)) (search-forward "") (replace-match (concat " " _-attr-value)))) ;; Delete "99k" (file sizes) (progn (goto-char (point-min)) (flush-lines "^[0-9]+k[\t]$")) _-rc)) ;;; ############################################ ;; (setq x (lia-sgml-make-lftp-source-file 34)) (defun lia-sgml-make-lftp-source-file (arg1voln) "" (let (input-list path path-list path-remote lftp-script-path lftp-script _rc) (if (integerp arg1voln) (setq arg1voln (int-to-string arg1voln))) ;; NOTE: Must remove it first before this will work: ;; $ rm -fR /tmp/volume34/ ;; $ rm -fR /mnt/win/tmp/volume34/ (setq input-list (lia-sgml-copy-hrefs-volume arg1voln ;;t )) (setq lftp-script-path (car input-list)) (setq lftp-script-path (substring lftp-script-path 0 (1+ (string-match "/" lftp-script-path (string-match "volume" lftp-script-path))))) (setq lftp-script (concat lftp-script-path "lftp.bat")) (with-temp-buffer (loop for file in input-list do (setq path (substring file 0 (string-match "[^/]+$" file))) (setq path-remote (substring path (length lftp-script-path))) (when (not (member path-remote path-list)) (_-app 'path-list path-remote) (setq path-remote-full (concat "/leninist.biz/data/marx.org/" path-remote)) (insert "lcd " path "\n" "mkdir -p " path-remote-full "\n" "cd " path-remote-full "\n" "mput *" "\n")) ) (write-region (point-min) (point-max) lftp-script)) (setq _rc lftp-script) _rc)) ;; (setq x (lia-sgml-copy-hrefs-volume 34)) (defun lia-sgml-copy-hrefs-volume (arg1voln &optional arg2nocopy) "" (let (rc pf file file-target tfd folder) (if (integerp arg1voln) (setq arg1voln (int-to-string arg1voln))) (setq tfd (concat (if t "/mnt/win") temporary-file-directory "volume" arg1voln)) (if (not arg2nocopy) (make-directory tfd t)) (loop for pf in (lia-sgml-get-hrefs-volume arg1voln) do (setq file (substring pf (+ (string-match "[.]org/" pf) (length ".org/")))) (setq folder (concat tfd "/" (file-name-directory file))) (if (and (not arg2nocopy) (not (file-directory-p folder))) (make-directory folder t)) (setq file-target (concat tfd "/" file)) (_-app 'rc file-target) (when (and (not (file-exists-p file-target)) (not arg2nocopy)) (copy-file pf file-target))) rc)) ;; (setq x (lia-sgml-get-hrefs-volume 24)) ;; (setq x (lia-sgml-get-hrefs-volume 34)) (defun lia-sgml-get-hrefs-volume (arg1voln) "Return list of pathfiles for all HTML documents in volume ARG1. Does not assume index file, volume99.htm, has links to all HTML, therefore HTML documents are read from ``index.htm'' links in volume99.htm. CHECK: grep: 999 CHECK: volume99.htl ../1895/nov/00pba.htm 34 parent 20-22 letter ../1895/nov/15pba.htm 34 parent 23 letter ../1897/aug/16pba.htm 34 parent 24 letter ... " (let (pf pf-1 href href-base-0 href-base-1 pf-list volume-index rc) (if (integerp arg1voln) (setq arg1voln (int-to-string arg1voln))) (setq volume-index (concat "~/"my-dns-mia"/archive/lenin/works/cw/volume" arg1voln "." t2h-ext-html)) (with-temp-buffer (_-ifcl volume-index) (setq href-base-0 (file-name-directory volume-index)) (goto-char (point-min)) (while (search-forward-regexp (concat "href=." "\\(" "[.][.]/1[89]" "[^'\"]+" "[.]" t2h-ext-html "\\)") nil t) (setq pf (concat href-base-0 (setq href (match-string-no-properties 1)))) (_-app 'pf-list (concat pf "")) ;; Sometimes children of index.htm are NOT linked from volume99.htm! (when (string-match (concat "/index." t2h-ext-html "$") pf) (with-temp-buffer (_-ifcl pf) (setq href-base-1 (file-name-directory pf)) (goto-char (point-min)) (while (search-forward-regexp (concat "href=." "\\(" "[^.#]" "[^'\"]" "[.]" t2h-ext-html "\\)") nil t) (setq pf-1 (concat href-base-1 (match-string-no-properties 1))) (if (not (member pf-1 pf-list)) (_-app 'pf-list (concat pf-1 "")))))))) (loop for pf in pf-list do ;; Remove "/../" in path. (while (string-match "/[^/]+/[.][.]/" pf) (setq pf (replace-match "/" nil nil pf))) (_-app 'rc pf)) rc ;; pf-list )) (defun t2h-sgml-toc-numbered-letters nil "DO NOT USE! Run-once script to add anchors to lenin/works/cw/volume3[45].htm. Put point at beginning of name-of-month, then run this interactive function" (interactive "*") (let (t2h-letx mmm yyyy nnnn filename (vv (progn (string-match t2h-regexp-htm-volume99 (buffer-file-name)) (match-string 1 (buffer-file-name))))) ;; Get the month. ;; 1. P. B. AXELROD. Beginning of November (unless (member (setq mmm (downcase (buffer-substring (point) (+ 3 (point))))) '("jan" "feb" "mar" "apr" "may" "jun" "jul" "aug" "sep" "oct" "nov" "dec")) (error "Place point at beginning of month name in volume34/35.htm")) (save-excursion (search-backward "<") (unless (looking-at "<[eE][mM][^a-zA-Z]") (error "Point must be inside EM element!"))) ;; Get the year from above H9 element. (save-excursion (search-backward-regexp "<[hH]") (search-forward ">") (unless (looking-at "[0-9]") (error (concat "Expecting H9 header element at: " (number-to-string (point)) ": " (buffer-file-name) ""))) (if (not (search-forward-regexp "[ \t]*\\([0-9][0-9][0-9][0-9]\\)[ \t]*<" nil t)) (error "Expecting a year after a H9 element") (setq yyyy (match-string 1)))) ;; Insert A element. (save-excursion (search-backward-regexp "<[tT[dD]") (search-forward ">") (if (not (search-forward-regexp "[ \t\n]*\\([0-9]+\\)[.]" nil t)) (error "Expecting a 999. at beginning of TD") (setq nnnn (match-string 1))) (goto-char (match-beginning 1)) ;; Insert opening A tag. (setq filename (concat "l" (format "%04d" (string-to-number nnnn)) "v" vv)) (insert "") (search-forward-regexp "<[eE][mM]") (if (not (search-backward-regexp "[.][ \t\n]+" nil t)) (error "Expecting period at end of last name!")) (insert "")))) (defvar t2h-hist-name-initial-contents "1911-feb" "see t2h-sgml-toc-anchor-title") (defun t2h-sgml-toc-anchor-title (&optional arg2) ;; insert A manually before href inserted. "Toss an 'A' anchor around title at point, beginning with , where titles ends at or at end-of-paragraph (i.e., blank line)" (interactive) (let (t2h-letx t2h-class t2h-name t2h-href t2h-dir t2h-a-open-end t2h-end-of-para t2h-last-td-data) (search-backward-regexp "") (forward-char -1) (search-forward-regexp "[^> \t\n]") (forward-char -1) (occur (if arg2 (progn (string-match "[^-]+[-][^-]+[-][0-9][0-9]" arg2) (substring arg2 (match-beginning 0) (match-end 0))) (concat t2h-hist-name-initial-contents ".")) ) ; "1" in "10" (setq t2h-class (read-from-minibuffer "class= " "article" nil nil 't2h-hist-class)) (setq t2h-name (read-from-minibuffer (concat " \(" t2h-class "\) " "name= ") t2h-hist-name-initial-contents nil nil 't2h-hist-name)) (setq t2h-href (concat "../" (subst-char-in-string 45 47 t2h-name) ".htm")) (setq t2h-dir (substring t2h-href 0 (string-match "/[^/]+$" t2h-href))) (insert "") (setq t2h-a-open-end (point)) (save-excursion (search-forward-regexp "") (when (string-match "[-]index$" t2h-name) (make-directory (concat t2h-path-cw "/" t2h-dir)) (mail) (mail-to) (insert "webmaster@leninist.biz") (mail-subject) (insert "Please make this directory on server.") (mail-text) (insert "\n(make-directory \"~/archive/lenin/works/cw/" t2h-dir "\")") (delete-region (point) (point-max)) (mail-send-and-exit nil)) ;; (t2h-check-hyperlink-collisions t) ;; lia_qual.el (goto-char t2h-a-open-end) (setq t2h-hist-name-initial-contents (progn (setq t2h-letx (split-string t2h-name "-")) (concat (nth 0 t2h-letx) "-" (nth 1 t2h-letx) "-"))) (search-forward "") ;; ;; Move to next document and repeat. Pauses at read. (search-forward "") (search-forward-regexp "[a-zA-Z]") (recenter 2) (t2h-sgml-toc-anchor-title t2h-name))) ;;; Sun Sep 15 23:01:07 PDT 2002 (defun t2h-check-hyperlink-collisions (clobber) "Abort if an HREF value in buffer collides with an existing file" ;; Return base HREF. (interactive "p") (let ((my-href nil) (my-href-list ()) (my-directory nil) (my-point-min -1) (my-base-href nil) (my-base-href-default (concat "~/"my-dns-mia"/archive/lenin/works/cw")) ;; ;; Above default will work on server or on local hard drive if ;; _ON SERVER_ ~/www.marxists.org is a symlink to ;; /www/public_html ! ;; Where did ~/www.marxists.org come from? ;; _ON LOCAL HARD DRIVE_, ;; ``cd ; wget --no-parent --recursive http://www.marxists.org/...'' ;; puts everything under ~/www.marxists.org ! ;; This rsync syntax uses directory hierarchy created by wget: ;; ``rsync ... '' ) ;; ;; fix? (if (null clobber) (setq clobber nil)) (defun my-href-list-handler (my-href-candidate) "Add string to list otherwise abort if it already exists" (if (not (null my-href-candidate)) (if (member my-href-candidate my-href-list) (error "%s %s" "Duplicate HREF in buffer:" my-href-candidate) (message (concat "Adding " my-href-candidate "...")) (setq my-href-list (append (list my-href-candidate) my-href-list))))) (save-excursion (save-restriction ;; Set base directory. (goto-char (point-min)) (if (not (file-directory-p (if (search-forward-regexp "<[bB][aA][sS][eE]" nil t) (if (search-forward-regexp "[ \t\n]+[hH][rR][eE][fF]=\"\\([^\"]+\\)\"") (setq my-base-href (match-string-no-properties 1))) (setq my-base-href my-base-href-default)))) (error (concat "Not a directory: " my-base-href " !"))) ;; Ex.: ;; ...and remove trailing slashes... (while (string-equal "/" (substring my-base-href (- (length my-base-href) 1))) (setq my-base-href (substring my-base-href 0 (- (length my-base-href) 1)))) (message "%s %s" "Setting base directory..." my-base-href) ;; TO-DO: check for conflicting name= and href= values. ;; To skip HREF= in do _NOT_ do (beginning-of-buffer)! (search-forward-regexp "<[Bb][Oo][Dd][Yy]") (setq my-point-min (point)) (setq my-href-list ()) (while (search-forward-regexp (concat "\\([hH][rR][eE][fF]\\)=\"" "\\([^\"]+\\)" "\"") nil t) (my-href-list-handler (my-check-hyperlink (match-string-no-properties 2) my-base-href (match-string-no-properties 1) clobber))) (setq my-href-list ()) (while (search-forward-regexp (concat "\\([nN][aA][mM][eE]\\)=\"" "\\([^\"]+\\)" "\"") nil t) (my-href-list-handler (my-check-hyperlink (match-string-no-properties 2) my-base-href (match-string-no-properties 1) clobber))) (message "%s" "No collisions."))) my-base-href)) (provide 'lia-sgml) ;;; ;