;;; ---------------------------------------------------------------- ;;; Modelled after 'viewtioga' by David Nichols. ;;; Bugs to Norman Adams (norman@parc.xerox.com) ;;; ;;; TIOGA-CONVERT-BUFFER walks a buffer containing a tioga document, ;;; and calls client supplied procedures at notable junctures in the ;;; tioga structure. The only entry point in TIOGA-WALK-BUFFER, ;;; a function of 5 arguments. ;;; ;;; Presumably the client implements a conversion form tioga format to ;;; some other format. "tioga-dump.el" is a sample client that prints ;;; the tioga structure in a readable way. "tioga-scheme.el" converts ;;; Tioga-formatted Scheme code to plain text preserving indentation. ;;; ;;; The client supplies 5 callback routines as arguments to ;;; TIOGA-WALK-BUFFER. For the sake of this description, the five ;;; callback routines are (in argument order): start-node, handle-prop, ;;; add-looks, insert-text, and end-node. ;;; ;;; The arguments passes to each of these are as follows: ;;; ;;; (defun start-node (format-name level) ...) ;;; format-name (string) - the name of the format for this node ;;; level (integer) - the nesting level of this node ;;; ;;; (defun handle-prop (prop-name value) ...) ;;; prop-name (string) - the name of a property for this node ;;; value (string) - the value of the property ;;; ;;; (defun add-looks (looks start len) ...) ;;; looks (4 character string) - actually a 32 bit number with ;;; the MSB in the high bit of (aref string 0). ;;; start, len (integers) - the run of this node's text to which to apply ;;; the looks. START is relative to the start of this node's text. ;;; ;;; (defun insert-text (text commentp level) ...) ;;; text (string) - the text for this node ;;; comment-p (boolean) - true is this is a comment node ;;; level (integer) - the next level of this node ;;; ;;; (defun end-node (level) ...) ;;; level (integer) - the nesting level of the node that is ending. ;;; ;;; An idealized node in a tioga document can be described as: ;;; node ::= start-spec property-spec* looks-spec* insert-spec end-spec ;;; start-spec ::= START-OP format-name ;;; property-spec ::= PROP-OP property-name property-value ;;; looks-spec ::= LOOKS-OP looks start len ;;; insert-spec ::= COMMENT-OP text | TEXT-OP text ;;; end-spec ::= END-OP ;;; ;;; A Tioga document is just a node. The root node may contain ;;; properties, but the text for the insert-spec is null. ;;; ;;; ;;; See tioga-dump.el and tioga-scheme.el for sample clients. ;;; ;;; A section at the end of this files defines a few utilites ;;; for converters to use. ;;; ;;; ---------------- from ~nichols/scr/viewtioga/src/tioga.h ; endOfFile = 0, ; startNode, ; /** length of formatName in next byte. Text for format name ; follows that. ; enter in formatName table ; and assign it the next number */ ; startNodeFirst, ; startNodeLast = startNodeFirst + tioga_NumFormats, ; /* these opcodes encode previously seen formatName so don't need ; to repeat it find formatName in op-startNodeFirst of format ; table other information follows same as for startNode */ ; terminalTextNode, ; terminalTextNodeFirst, ; terminalTextNodeLast = terminalTextNodeFirst + tioga_NumFormats, ; /* these opcodes are for nodes without children so can skip ; endNode opcode identical to startNodeFirst..startNodeLast, ; except implies no children find format name in ; op-startNodeFirst of format table other information follows ; same as for startNode */ ; otherNode, ; /** for "other" format of nodes length of formatName in next byte. ; text for format name follows that. ; enter in formatName table ; and assign it the next number */ ; otherNodeShort, ; /* like otherNode, but followed by a formatName code number ; instead of length+text */ ; otherNodeSpecs, ; /** Gives "variety" and specifications for immediately previous ; "other" format node. Length of variety name follows in next ; byte(s). Then text of name. ; Enter in property name table and assign next number. ; Length of specs text in next byte(s) specs follow that. */ ; otherNodeSpecsShort, ; /* like otherNodeSpecs, but followed by a propname code number ; instead of length+text */ ; prop, ; /* Specifies property for current node. Property specs are stored ; on file as a rope. Length of property name follows in next ; byte(s). then text of name. Enter in property name table and ; assign next number. Length of property specs rope in next ; byte(s). Followed by text for property specs rope. */ ; propShort, ; /* Like prop, but followed by a propname code number instead of ; length+text. */ ; endNode, ; /* End current node and go back to adding to its parent. */ ; rope, ; /* This op declares rope for most recently started node. Length of ; text for the node in next byte(s). Actual text comes from text ; block followed by a CR which is not included in the length. */ ; comment, ; /* Identical to rope except implies text stored in comment area of ; file. */ ; runs, ; /* This op preceeds definition of looks for most recently started ; node. Number of runs in following byte(s). Have at most 1 runs ; op per node. if omit, then no special looks. */ ; looks, ; /** Looks vector in following 4 bytes. ; Enter vector in looks table. ; and assign it the next number ; Length of run in next byte(s). */ ; looksFirst, ; looksLast = looksFirst + tioga_NumLooks, ; /* These ops encode previously encountered looks so don't need to ; repeat. Find looks in looks table[op-looksFirst]. Length of run ; in the next byte(s). */ ; look1, ; /* Like looks op, except has single look char instead of 4 byte ; looks vector. */ ; look2, ; /* Like looks1, except has two looks chars. */ ; look3, ; /* Like look2, except has three looks chars. */ ;;; ---------------------------------------------------------------- ;;; after ~nichols/scr/viewtioga/src/tioga.h ;;; ---------------- Ops table. Created by create-top-table, below (defconst top-end-of-file 0) ; 00 (defconst top-start-node 1) ; 01 (defconst top-start-node-first 2) ; 02 (defconst top-start-node-last 72) ; 0110 (defconst top-terminal-text-node 73) ; 0111 (defconst top-terminal-text-node-first 74) ; 0112 (defconst top-terminal-text-node-last 144) ; 0220 (defconst top-other-node 145) ; 0221 (defconst top-other-node-short 146) ; 0222 (defconst top-other-node-specs 147) ; 0223 (defconst top-other-node-specs-short 148) ; 0224 (defconst top-prop 149) ; 0225 (defconst top-prop-short 150) ; 0226 (defconst top-end-node 151) ; 0227 (defconst top-rope 152) ; 0230 (defconst top-comment 153) ; 0231 (defconst top-runs 154) ; 0232 (defconst top-looks 155) ; 0233 (defconst top-looks-first 156) ; 0234 (defconst top-looks-last 206) ; 0316 (defconst top-look1 207) ; 0317 (defconst top-look2 208) ; 0320 (defconst top-look3 209) ; 0321 (defconst tioga_LenLen 4) (defconst tioga_IDLen 2) (defconst tioga_CommentHeaderLen (+ tioga_IDLen tioga_LenLen)) (defconst tioga_ControlHeaderLen (+ tioga_IDLen tioga_LenLen)) (defconst tioga_TrailerLen (+ tioga_IDLen ( * 3 tioga_LenLen))) (defconst trailer-id "\205\227") (defconst comment-id "\000\000") (defconst control-id "\235\312") ;;; ---------------------------------------------------------------- ;;; after ~nichols/scr/viewtioga/src/tioga.c (defvar tioga-text-next) (defvar tioga-text-limit) (defvar tioga-comment-next) (defvar tioga-comment-limit) (defvar tioga-control-next) (defvar tioga-control-limit) (defvar tioga-sum-limits) (defvar tioga-sum-size) (defvar tioga-progress-factor) (defun tioga-walk-buffer (tioga-start-node tioga-handle-prop tioga-add-looks tioga-insert-text tioga-end-node) (let* ((buffer-len (buffer-size)) (t-base (+ (- buffer-len tioga_TrailerLen) 1)) (t-start (tread-check-id-buffer t-base trailer-id)) (prop-len (tread-get-length-buffer t-start)) (text-len (tread-get-length-buffer (+ t-start 4))) (total-len (tread-get-length-buffer (+ t-start 8)))) (if (or (not (= total-len buffer-len)) (> prop-len total-len) (> text-len total-len)) (error "basic tioga sizes are inconsistent")) (let* ((comment-start (tread-check-id-buffer (+ text-len 1) comment-id)) (comment-len (tread-get-length-buffer comment-start)) (control-start (tread-check-id-buffer (+ text-len comment-len 1) control-id)) (control-len (tread-get-length-buffer control-start))) (if (not (= buffer-len (+ text-len comment-len control-len))) (error "basic tioga part sizes are inconsistent")) (setq tioga-text-next 1) (setq tioga-text-limit (+ text-len 1)) (setq tioga-comment-next (+ comment-start tioga_LenLen)) (setq tioga-comment-limit (+ text-len comment-len 1)) (setq tioga-control-next (+ control-start tioga_LenLen)) (setq tioga-control-limit (+ (- buffer-len tioga_TrailerLen) 1)) ;; for progress report messages (setq tioga-sum-limits (+ tioga-text-limit tioga-control-limit tioga-comment-limit)) (setq tioga-sum-size (+ text-len comment-len control-len)) (cond ((> tioga-sum-size 80000) (setq tioga-sum-size (/ tioga-sum-size 100)) (setq tioga-progress-factor 1)) (t (setq tioga-progress-factor 100))) (tioga-walk-loop) ))) ;;; ---------------- (defvar tioga-looks-count) (defvar tioga-looks-table) (defvar tioga-format-count) (defvar tioga-format-table) (defvar tioga-prop-count) (defvar tioga-prop-table) (defun tioga-init-tables () (setq tioga-looks-count 1) (setq tioga-looks-table '(("\000\000\000\000" . 0))) (setq tioga-format-count 1) (setq tioga-format-table '(("" . 0))) (setq tioga-prop-count 1) (setq tioga-prop-table '(("" . 0))) (tread-prop-index "prefix") (tread-prop-index "postfix") ) (defun tioga-walk-loop () (let ((op (tread-get-op)) (terminal-node nil) (last-was-terminal nil) (format-index nil) (prop-index nil) (level -1) ; first start-node bumps to zero ... (run-len 0) (len 'len-unspec) (done nil)) (tioga-init-tables) (while (not done) (cond ((and (<= top-terminal-text-node-first op) (<= op top-terminal-text-node-last)) (setq terminal-node t) (setq format-index (- op top-terminal-text-node-first)) (work-loop-case--what-to-do-on-break)) ((and (<= top-start-node-first op) (<= op top-start-node-last)) (setq terminal-node nil) (setq format-index (- op top-start-node-first)) (work-loop-case--what-to-do-on-break)) ((= op top-end-node) ;; todo (tioga-do-end-node) (setq op (tread-get-op))) ((or (= op top-start-node) (= op top-terminal-text-node)) (let ((str (tread-get-str))) (setq format-index (tread-format-index str)) (setq terminal-node (= op top-terminal-text-node)) (work-loop-case--what-to-do-on-break))) ((or (= op top-rope) (= op top-comment)) ;; Get newline, just don't pass it to client. (let* ((length (tread-get-int)) (str (if (= op top-rope) (tread-span-text (+ length 1)) (tread-span-comment (+ length 1))))) ;; Convert newlines if should. ;; FixNewlines(r->str, length + 1) ; **** (tioga-progress-report) (funcall tioga-insert-text str (= op top-comment) level) (if (and (not (= run-len 0)) (not (= run-len length))) (tioga-error "Rope length(%d) doesn't match run length(%d)\n" length run-len)) (setq run-len 0) (setq op (tread-get-op)))) ((= op top-runs) (let ((nRuns (tread-get-int)) (i 0)) (setq run-len 0) (while (< i nRuns) (let ((look-index 'foo)) (setq op (tread-get-op)) (cond ((and (<= top-looks-first op) (<= op top-looks-last)) (setq look-index (- op top-looks-first))) ((and (<= top-look1 op) (<= op top-look3)) (setq look-index (tread-look-chars-to-index (+ (- op top-look1) 1)))) ((= op top-looks) (let ((looks-str (tread-get-looks-str))) (setq look-index (tread-looks-index looks-str)))) (t (tioga-error "no looks in run"))) (cond ((>= look-index tioga-looks-count) (tioga-error "Look index(%d) too large.\n" look-index) (setq look-index 0))) (let ((rl (tread-get-int))) (funcall tioga-add-looks (tread-get-looks look-index) run-len rl) (setq run-len (+ run-len rl)) (setq i (+ i 1))))) (setq op (tread-get-op)))) ((= op top-prop) (let ((str (tread-get-str))) (setq prop-index (tread-prop-index str)) (let* ((len (tread-get-int)) (str (tread-span-control len))) (funcall tioga-handle-prop (tread-get-prop prop-index) str) (setq op (tread-get-op))))) ((= op top-prop-short) (setq prop-index (tread-get-byte)) (let* ((len (tread-get-int)) (str (tread-span-control len))) (funcall tioga-handle-prop (tread-get-prop prop-index) str) (setq op (tread-get-op)))) ((= op top-end-of-file) (cond ((= level 0) (work-loop-case--what-to-do-on-break)) (t ;; Supply missing endNode ops. (setq op top-endNode)))) ;;((= op top-other-node) ...) ;;((= op top-other-node-short) ...) ;;((= op top-other-node-specs) ...) ;;((= op top-other-node-specs-short) ...) (t (tioga-error "Illegal op code: %d\n" op) (setq op (tread-get-op))) )))) (defun tioga-do-end-node () (cond ((>= level 0) (setq level (- level 1))) (t (tioga-error "Too many endNodes.\n"))) (funcall tioga-end-node (+ level 1))) ;; This relies on dynamic scoping. (defun work-loop-case--what-to-do-on-break () (cond ((and (= level 0) (= op top-end-of-file)) (setq done t)) (t ;; If we make it here, then we want to start a new text node. (if last-was-terminal (tioga-do-end-node)) (setq last-was-terminal terminal-node) (setq level (+ level 1)) (funcall tioga-start-node (tread-get-format format-index) level) (setq op (tread-get-op))))) (defun tioga-progress-report () (let ((remains (- tioga-sum-limits (+ tioga-text-next tioga-control-next tioga-comment-next)))) (message (concat "Tioga: " (- 100 (/ (* tioga-progress-factor remains) tioga-sum-size)) "%% converted.")) (sit-for 0))) ;;; ---------------------------------------------------------------- ;;; after ~nichols/scr/viewtioga/src/tread.c ;;; ---------------- hacking tioga file sections ;;; ---- length field (defun tread-get-length-buffer (pos) (tread-get-length (buffer-substring pos (+ pos 4)) 0)) (defun tread-get-length (str pos) (+ (string-to-char (substring str (+ pos 1) (+ pos 2))) (lsh (string-to-char (substring str pos (+ pos 1))) 8) (lsh (string-to-char (substring str (+ pos 2) (+ pos 3))) 24) (lsh (string-to-char (substring str (+ pos 3) (+ pos 4))) 16))) ;;; ---- check section id (defun tread-check-id-buffer (pos id) (let ((l (length id))) (tread-check-id (buffer-substring pos (+ pos l)) 0 id) (+ pos l))) (defun tread-check-id (str pos id) (if (not (string= (substring str pos (+ pos (length id))) id)) (error "tread-check-id failed"))) ;;; ---------------- Control stream fetchers ;;; ---- op byte (defun tread-get-op () (cond ((< tioga-control-next tioga-control-limit) (let ((c (char-after tioga-control-next))) (setq tioga-control-next (+ tioga-control-next 1)) c)) (t top-end-of-file))) ;;; ---- byte (defun tread-get-byte () (cond ((< tioga-control-next tioga-control-limit) (let ((pos tioga-control-next)) (setq tioga-control-next (+ pos 1)) (string-to-char (buffer-substring pos tioga-control-next)))) ;; (tioga-error "eof get control byte") (t 0))) ;;; ---- integer ;;; need bignums? (elisp has 24 bit fixnums) (defun tread-get-int () (let ((nbits 0) (result 0) (done nil)) (while (not done) (let ((byte (char-after tioga-control-next))) (setq tioga-control-next (+ tioga-control-next 1)) (setq done (< byte 128)) (let ((byte-low (logand byte 127))) (if (or (> nbits 21) (and (= nbits 21) (> byte-low 3))) (tioga-error "(NYI) more than 23 bits of length")) (setq result (logior result (lsh byte-low nbits))) (setq nbits (+ nbits 7))))) result)) ;;; ---- string (defun tread-get-str () (cond ((< tioga-control-next tioga-control-limit) (let ((len (char-after tioga-control-next))) (setq tioga-control-next (+ tioga-control-next 1)) (tread-span-control len))) (t (tioga-error "eof getting string length")))) ;;; ---------------- fetch bytes from one of the 3 sections ;;; ---- control (defun tread-span-control (len) (cond ((> (+ tioga-control-next len) tioga-control-limit) (tioga-error "eof getting rope from control")) (t (let ((pos tioga-control-next)) (setq tioga-control-next (+ tioga-control-next len)) (buffer-substring pos tioga-control-next))))) ;;; ---- comment (defun tread-span-comment (len) (cond ((> (+ tioga-comment-next len) tioga-comment-limit) (tioga-error "eof getting rope from comment")) (t (let ((pos tioga-comment-next)) (setq tioga-comment-next (+ tioga-comment-next len)) (buffer-substring pos tioga-comment-next))))) ;;; ---- text (defun tread-span-text (len) (cond ((> (+ tioga-text-next len) tioga-text-limit) (tioga-error "eof getting rope from text")) (t (let ((pos tioga-text-next)) (setq tioga-text-next (+ tioga-text-next len)) (buffer-substring pos tioga-text-next))))) ;;; ---------------- Looks ;;; Get 4 bytes of looks, MSB is high bit of the first character of string (defun tread-get-looks-str () (let ((pos tioga-control-next)) (setq tioga-control-next (+ pos 4)) (cond ((<= tioga-control-next tioga-control-limit) (buffer-substring pos tioga-control-next)) (t (tioga-error "eof getting looks string"))))) ;;; Convert looks characters to a looks vector ;;; "a" is MSB (defun tread-look-chars-to-index (n) (let ((result (make-string 4 0))) (while (> n 0) (let ((c (tread-get-byte))) (cond ((and (>= c ?a) (<= c (+ ?a 31))) (let* ((bitnum-in-word (- 31 (- c ?a))) (bitnum-in-byte (% bitnum-in-word 8)) (bytenum (- 3 (/ bitnum-in-word 8)))) (aset result bytenum (logior (aref result bytenum) (lsh 1 bitnum-in-byte))))) (t (tioga-error "Illegal look character: %d" c)))) (setq n (- n 1))) (tread-looks-index result))) ;;; Register a looks vector, return index (defun tread-looks-index (looks-str) (cond ((string= looks-str "\000\000\000\000") 0) (t (let ((probe (assoc looks-str tioga-looks-table))) (cond ((null probe) (setq tioga-looks-table (cons (cons looks-str tioga-looks-count) tioga-looks-table)) (prog1 tioga-looks-count (setq tioga-looks-count (+ tioga-looks-count 1)))) (t (cdr probe))))))) ;;; Get a looks vector given an index (defun tread-get-looks (index) (let ((probe (rassq index tioga-looks-table))) (cond (probe (car probe)) (t (tioga-error "invalid looks index (%d)" index))))) ;;; ---------------- Format ;;; Add a new format, and return its index. (defun tread-format-index (format-str) (cond ((string= format-str "") 0) (t (let ((probe (assoc format-str tioga-format-table))) (cond ((null probe) (setq tioga-format-table (cons (cons format-str tioga-format-count) tioga-format-table)) (prog1 tioga-format-count (setq tioga-format-count (+ tioga-format-count 1)))) (t (cdr probe))))))) ;;; Get a format given an index. (defun tread-get-format (index) (let ((probe (rassq index tioga-format-table))) (cond (probe (car probe)) (t (tioga-error "invalid format index (%d)" index))))) ;;; ---------------- Properties ;;; Add a new prop, and return its index. (defun tread-prop-index (prop-str) (cond ((string= prop-str "") 0) (t (let ((probe (assoc prop-str tioga-prop-table))) (cond ((null probe) (setq tioga-prop-table (cons (cons (downcase prop-str) tioga-prop-count) tioga-prop-table)) (prog1 tioga-prop-count (setq tioga-prop-count (+ tioga-prop-count 1)))) (t (cdr probe))))))) ;;; Get a property given an index. (defun tread-get-prop (index) (let ((probe (rassq index tioga-prop-table))) (cond (probe (car probe)) (t (tioga-error "invalid property index (%d)" index))))) ;;; ---------------- Errors (defun tioga-error (str &rest args) (apply (function error) (cons (concat "Error converting Tioga: " str) args))) ;;; ---------------- Development aids ;(defun create-top-table () ; (let ((ops '(end-of-file start-node start-node-first (start-node-last 70) ; terminal-text-node terminal-text-node-first ; (terminal-text-node-last 70) other-node ; other-node-short other-node-specs ; other-node-specs-short prop prop-short end-node rope ; comment runs looks looks-first (looks-last 50) look1 ; look2 look3)) ; (num -1)) ; (while ops ; (let ((op (car ops)) ; (incr 1)) ; (cond ((consp op) ; (setq incr (car (cdr op))) ; (setq op (car op)))) ; (setq op (intern (concat "top-" (symbol-name op)))) ; (set op (setq num (+ num incr))) ; (insert (format "\n(defconst %s %d) ; 0%o" ; op (symbol-value op) (symbol-value op))) ; (beginning-of-line) ; (indent-sexp) ; (end-of-line) ; (setq ops (cdr ops)))) ; )) ;(create-top-table) ;;; ---------------- Converter utilities (defun tioga-format-looks (looks-str) (let ((bitnum-in-word 31) (char ?a) (result "")) (while (>= bitnum-in-word 0) (let* ((bitnum-in-byte (% bitnum-in-word 8)) (bytenum (- 3 (/ bitnum-in-word 8)))) (if (> (logand (lsh 1 bitnum-in-byte) (aref looks-str bytenum)) 0) (setq result (concat result (char-to-string char))))) (setq bitnum-in-word (- bitnum-in-word 1)) (setq char (+ char 1))) result)) (provide 'tioga)