;;; begincopyright
;;; Copyright (c) 1993 Xerox Corporation. All rights reserved.
;;; Use and copying of this software and preparation of derivative works based
;;; upon this software are permitted. Any distribution of this software or
;;; derivative works must comply with all applicable United States export
;;; control laws. This software is made available AS IS, and Xerox Corporation
;;; makes no warranty about the software, its performance or its conformity to
;;; any specification. Any person obtaining a copy of this software is requested
;;; to send their name and post office or electronic mail address to:
;;; PCR Coordinator
;;; Xerox PARC
;;; 3333 Coyote Hill Rd.
;;; Palo Alto, CA 94304
;;; or pcrcoordinator@parc.xerox.com
;;; endcopyright */
;;; ----------------------------------------------------------------
;;; Modelled after 'viewtioga' by David Nichols.
;;; Bugs to Norman Adams (norman@parc.xerox.com)
;;;
;;; TIOGA-CONVERT-BUFFER walks a buffer containing a tioga document,
;;; and calls client supplied procedures at notable junctures in the
;;; tioga structure. The only entry point in TIOGA-WALK-BUFFER,
;;; a function of 5 arguments.
;;;
;;; Presumably the client implements a conversion form tioga format to
;;; some other format. "tioga-dump.el" is a sample client that prints
;;; the tioga structure in a readable way. "tioga-scheme.el" converts
;;; Tioga-formatted Scheme code to plain text preserving indentation.
;;;
;;; The client supplies 5 callback routines as arguments to
;;; TIOGA-WALK-BUFFER. For the sake of this description, the five
;;; callback routines are (in argument order): start-node, handle-prop,
;;; add-looks, insert-text, and end-node.
;;;
;;; The arguments passes to each of these are as follows:
;;;
;;; (defun start-node (format-name level) ...)
;;; format-name (string) - the name of the format for this node
;;; level (integer) - the nesting level of this node
;;;
;;; (defun handle-prop (prop-name value) ...)
;;; prop-name (string) - the name of a property for this node
;;; value (string) - the value of the property
;;;
;;; (defun add-looks (looks start len) ...)
;;; looks (4 character string) - actually a 32 bit number with
;;; the MSB in the high bit of (aref string 0).
;;; start, len (integers) - the run of this node's text to which to apply
;;; the looks. START is relative to the start of this node's text.
;;;
;;; (defun insert-text (text commentp level) ...)
;;; text (string) - the text for this node
;;; comment-p (boolean) - true is this is a comment node
;;; level (integer) - the next level of this node
;;;
;;; (defun end-node (level) ...)
;;; level (integer) - the nesting level of the node that is ending.
;;;
;;; An idealized node in a tioga document can be described as:
;;; node ::= start-spec property-spec* looks-spec* insert-spec end-spec
;;; start-spec ::= START-OP format-name
;;; property-spec ::= PROP-OP property-name property-value
;;; looks-spec ::= LOOKS-OP looks start len
;;; insert-spec ::= COMMENT-OP text | TEXT-OP text
;;; end-spec ::= END-OP
;;;
;;; A Tioga document is just a node. The root node may contain
;;; properties, but the text for the insert-spec is null.
;;;
;;;
;;; See tioga-dump.el and tioga-scheme.el for sample clients.
;;;
;;; A section at the end of this files defines a few utilites
;;; for converters to use.
;;;
;;; ---------------- from ~nichols/scr/viewtioga/src/tioga.h
; endOfFile = 0,
; startNode,
; /** length of formatName in next byte. Text for format name
; follows that.
; enter in formatName table
; and assign it the next number */
; startNodeFirst,
; startNodeLast = startNodeFirst + tioga←NumFormats,
; /* these opcodes encode previously seen formatName so don't need
; to repeat it find formatName in op-startNodeFirst of format
; table other information follows same as for startNode */
; terminalTextNode,
; terminalTextNodeFirst,
; terminalTextNodeLast = terminalTextNodeFirst + tioga←NumFormats,
; /* these opcodes are for nodes without children so can skip
; endNode opcode identical to startNodeFirst..startNodeLast,
; except implies no children find format name in
; op-startNodeFirst of format table other information follows
; same as for startNode */
; otherNode,
; /** for "other" format of nodes length of formatName in next byte.
; text for format name follows that.
; enter in formatName table
; and assign it the next number */
; otherNodeShort,
; /* like otherNode, but followed by a formatName code number
; instead of length+text */
; otherNodeSpecs,
; /** Gives "variety" and specifications for immediately previous
; "other" format node. Length of variety name follows in next
; byte(s). Then text of name.
; Enter in property name table and assign next number.
; Length of specs text in next byte(s) specs follow that. */
; otherNodeSpecsShort,
; /* like otherNodeSpecs, but followed by a propname code number
; instead of length+text */
; prop,
; /* Specifies property for current node. Property specs are stored
; on file as a rope. Length of property name follows in next
; byte(s). then text of name. Enter in property name table and
; assign next number. Length of property specs rope in next
; byte(s). Followed by text for property specs rope. */
; propShort,
; /* Like prop, but followed by a propname code number instead of
; length+text. */
; endNode,
; /* End current node and go back to adding to its parent. */
; rope,
; /* This op declares rope for most recently started node. Length of
; text for the node in next byte(s). Actual text comes from text
; block followed by a CR which is not included in the length. */
; comment,
; /* Identical to rope except implies text stored in comment area of
; file. */
; runs,
; /* This op preceeds definition of looks for most recently started
; node. Number of runs in following byte(s). Have at most 1 runs
; op per node. if omit, then no special looks. */
; looks,
; /** Looks vector in following 4 bytes.
; Enter vector in looks table.
; and assign it the next number
; Length of run in next byte(s). */
; looksFirst,
; looksLast = looksFirst + tioga←NumLooks,
; /* These ops encode previously encountered looks so don't need to
; repeat. Find looks in looks table[op-looksFirst]. Length of run
; in the next byte(s). */
; look1,
; /* Like looks op, except has single look char instead of 4 byte
; looks vector. */
; look2,
; /* Like looks1, except has two looks chars. */
; look3,
; /* Like look2, except has three looks chars. */
;;; ----------------------------------------------------------------
;;; after ~nichols/scr/viewtioga/src/tioga.h
;;; ---------------- Ops table. Created by create-top-table, below
(defconst top-end-of-file 0) ; 00
(defconst top-start-node 1) ; 01
(defconst top-start-node-first 2) ; 02
(defconst top-start-node-last 72) ; 0110
(defconst top-terminal-text-node 73) ; 0111
(defconst top-terminal-text-node-first 74) ; 0112
(defconst top-terminal-text-node-last 144) ; 0220
(defconst top-other-node 145) ; 0221
(defconst top-other-node-short 146) ; 0222
(defconst top-other-node-specs 147) ; 0223
(defconst top-other-node-specs-short 148) ; 0224
(defconst top-prop 149) ; 0225
(defconst top-prop-short 150) ; 0226
(defconst top-end-node 151) ; 0227
(defconst top-rope 152) ; 0230
(defconst top-comment 153) ; 0231
(defconst top-runs 154) ; 0232
(defconst top-looks 155) ; 0233
(defconst top-looks-first 156) ; 0234
(defconst top-looks-last 206) ; 0316
(defconst top-look1 207) ; 0317
(defconst top-look2 208) ; 0320
(defconst top-look3 209) ; 0321
(defconst tioga←LenLen 4)
(defconst tioga←IDLen 2)
(defconst tioga←CommentHeaderLen (+ tioga←IDLen tioga←LenLen))
(defconst tioga←ControlHeaderLen (+ tioga←IDLen tioga←LenLen))
(defconst tioga←TrailerLen (+ tioga←IDLen ( * 3 tioga←LenLen)))
(defconst trailer-id "\205\227")
(defconst comment-id "\000\000")
(defconst control-id "\235\312")
;;; ----------------------------------------------------------------
;;; after ~nichols/scr/viewtioga/src/tioga.c
(defvar tioga-text-next)
(defvar tioga-text-limit)
(defvar tioga-comment-next)
(defvar tioga-comment-limit)
(defvar tioga-control-next)
(defvar tioga-control-limit)
(defvar tioga-sum-limits)
(defvar tioga-sum-size)
(defvar tioga-progress-factor)
(defun tioga-walk-buffer (tioga-start-node
tioga-handle-prop
tioga-add-looks
tioga-insert-text
tioga-end-node)
(let* ((buffer-len (buffer-size))
(t-base (+ (- buffer-len tioga←TrailerLen) 1))
(t-start (tread-check-id-buffer t-base trailer-id))
(prop-len (tread-get-length-buffer t-start))
(text-len (tread-get-length-buffer (+ t-start 4)))
(total-len (tread-get-length-buffer (+ t-start 8))))
(if (or (not (= total-len buffer-len))
(> prop-len total-len)
(> text-len total-len))
(error "basic tioga sizes are inconsistent"))
(let* ((comment-start (tread-check-id-buffer (+ text-len 1) comment-id))
(comment-len (tread-get-length-buffer comment-start))
(control-start (tread-check-id-buffer (+ text-len comment-len 1)
control-id))
(control-len (tread-get-length-buffer control-start)))
(if (not (= buffer-len (+ text-len comment-len control-len)))
(error "basic tioga part sizes are inconsistent"))
(setq tioga-text-next 1)
(setq tioga-text-limit (+ text-len 1))
(setq tioga-comment-next (+ comment-start tioga←LenLen))
(setq tioga-comment-limit (+ text-len comment-len 1))
(setq tioga-control-next (+ control-start tioga←LenLen))
(setq tioga-control-limit (+ (- buffer-len tioga←TrailerLen) 1))
;; for progress report messages
(setq tioga-sum-limits
(+ tioga-text-limit tioga-control-limit tioga-comment-limit))
(setq tioga-sum-size (+ text-len comment-len control-len))
(cond ((> tioga-sum-size 80000)
(setq tioga-sum-size (/ tioga-sum-size 100))
(setq tioga-progress-factor 1))
(t
(setq tioga-progress-factor 100)))
(tioga-walk-loop)
)))
;;; ----------------
(defvar tioga-looks-count)
(defvar tioga-looks-table)
(defvar tioga-format-count)
(defvar tioga-format-table)
(defvar tioga-prop-count)
(defvar tioga-prop-table)
(defun tioga-init-tables ()
(setq tioga-looks-count 1)
(setq tioga-looks-table '(("\000\000\000\000" . 0)))
(setq tioga-format-count 1)
(setq tioga-format-table '(("" . 0)))
(setq tioga-prop-count 1)
(setq tioga-prop-table '(("" . 0)))
(tread-prop-index "prefix")
(tread-prop-index "postfix")
)
(defun tioga-walk-loop ()
(let ((op (tread-get-op))
(terminal-node nil)
(last-was-terminal nil)
(format-index nil)
(prop-index nil)
(level -1) ; first start-node bumps to zero ...
(run-len 0)
(len 'len-unspec)
(done nil))
(tioga-init-tables)
(while (not done)
(cond ((and (<= top-terminal-text-node-first op)
(<= op top-terminal-text-node-last))
(setq terminal-node t)
(setq format-index (- op top-terminal-text-node-first))
(work-loop-case--what-to-do-on-break))
((and (<= top-start-node-first op)
(<= op top-start-node-last))
(setq terminal-node nil)
(setq format-index (- op top-start-node-first))
(work-loop-case--what-to-do-on-break))
((= op top-end-node)
;; todo
(tioga-do-end-node)
(setq op (tread-get-op)))
((or (= op top-start-node) (= op top-terminal-text-node))
(let ((str (tread-get-str)))
(setq format-index (tread-format-index str))
(setq terminal-node (= op top-terminal-text-node))
(work-loop-case--what-to-do-on-break)))
((or (= op top-rope) (= op top-comment))
;; Get newline, just don't pass it to client.
(let* ((length (tread-get-int))
(str (if (= op top-rope)
(tread-span-text (+ length 1))
(tread-span-comment (+ length 1)))))
;; Convert newlines if should.
;; FixNewlines(r->str, length + 1) ; ****
(tioga-progress-report)
(funcall tioga-insert-text str (= op top-comment) level)
(if (and (not (= run-len 0))
(not (= run-len length)))
(tioga-error
"Rope length(%d) doesn't match run length(%d)\n"
length run-len))
(setq run-len 0)
(setq op (tread-get-op))))
((= op top-runs)
(let ((nRuns (tread-get-int))
(i 0))
(setq run-len 0)
(while (< i nRuns)
(let ((look-index 'foo))
(setq op (tread-get-op))
(cond ((and (<= top-looks-first op)
(<= op top-looks-last))
(setq look-index (- op top-looks-first)))
((and (<= top-look1 op) (<= op top-look3))
(setq look-index
(tread-look-chars-to-index
(+ (- op top-look1) 1))))
((= op top-looks)
(let ((looks-str (tread-get-looks-str)))
(setq look-index (tread-looks-index looks-str))))
(t
(tioga-error "no looks in run")))
(cond ((>= look-index tioga-looks-count)
(tioga-error "Look index(%d) too large.\n" look-index)
(setq look-index 0)))
(let ((rl (tread-get-int)))
(funcall tioga-add-looks (tread-get-looks look-index)
run-len rl)
(setq run-len (+ run-len rl))
(setq i (+ i 1)))))
(setq op (tread-get-op))))
((= op top-prop)
(let ((str (tread-get-str)))
(setq prop-index (tread-prop-index str))
(let* ((len (tread-get-int))
(str (tread-span-control len)))
(funcall tioga-handle-prop (tread-get-prop prop-index) str)
(setq op (tread-get-op)))))
((= op top-prop-short)
(setq prop-index (tread-get-byte))
(let* ((len (tread-get-int))
(str (tread-span-control len)))
(funcall tioga-handle-prop (tread-get-prop prop-index) str)
(setq op (tread-get-op))))
((= op top-end-of-file)
(cond ((= level 0)
(work-loop-case--what-to-do-on-break))
(t
;; Supply missing endNode ops.
(setq op top-endNode))))
;;((= op top-other-node) ...)
;;((= op top-other-node-short) ...)
;;((= op top-other-node-specs) ...)
;;((= op top-other-node-specs-short) ...)
(t
(tioga-error "Illegal op code: %d\n" op)
(setq op (tread-get-op)))
))))
(defun tioga-do-end-node ()
(cond ((>= level 0)
(setq level (- level 1)))
(t
(tioga-error "Too many endNodes.\n")))
(funcall tioga-end-node (+ level 1)))
;; This relies on dynamic scoping.
(defun work-loop-case--what-to-do-on-break ()
(cond ((and (= level 0) (= op top-end-of-file))
(setq done t))
(t ;; If we make it here, then we want to start a new text node.
(if last-was-terminal
(tioga-do-end-node))
(setq last-was-terminal terminal-node)
(setq level (+ level 1))
(funcall tioga-start-node (tread-get-format format-index) level)
(setq op (tread-get-op)))))
(defun tioga-progress-report ()
(let ((remains
(- tioga-sum-limits
(+ tioga-text-next tioga-control-next tioga-comment-next))))
(message
(concat "Tioga: "
(- 100 (/ (* tioga-progress-factor remains)
tioga-sum-size))
"%% converted."))
(sit-for 0)))
;;; ----------------------------------------------------------------
;;; after ~nichols/scr/viewtioga/src/tread.c
;;; ---------------- hacking tioga file sections
;;; ---- length field
(defun tread-get-length-buffer (pos)
(tread-get-length (buffer-substring pos (+ pos 4)) 0))
(defun tread-get-length (str pos)
(+ (string-to-char (substring str (+ pos 1) (+ pos 2)))
(lsh (string-to-char (substring str pos (+ pos 1))) 8)
(lsh (string-to-char (substring str (+ pos 2) (+ pos 3))) 24)
(lsh (string-to-char (substring str (+ pos 3) (+ pos 4))) 16)))
;;; ---- check section id
(defun tread-check-id-buffer (pos id)
(let ((l (length id)))
(tread-check-id (buffer-substring pos (+ pos l)) 0 id)
(+ pos l)))
(defun tread-check-id (str pos id)
(if (not (string= (substring str pos (+ pos (length id)))
id))
(error "tread-check-id failed")))
;;; ---------------- Control stream fetchers
;;; ---- op byte
(defun tread-get-op ()
(cond ((< tioga-control-next tioga-control-limit)
(let ((c (char-after tioga-control-next)))
(setq tioga-control-next (+ tioga-control-next 1))
c))
(t top-end-of-file)))
;;; ---- byte
(defun tread-get-byte ()
(cond ((< tioga-control-next tioga-control-limit)
(let ((pos tioga-control-next))
(setq tioga-control-next (+ pos 1))
(string-to-char (buffer-substring pos tioga-control-next))))
;; (tioga-error "eof get control byte")
(t 0)))
;;; ---- integer
;;; need bignums? (elisp has 24 bit fixnums)
(defun tread-get-int ()
(let ((nbits 0)
(result 0)
(done nil))
(while (not done)
(let ((byte (char-after tioga-control-next)))
(setq tioga-control-next (+ tioga-control-next 1))
(setq done (< byte 128))
(let ((byte-low (logand byte 127)))
(if (or (> nbits 21)
(and (= nbits 21) (> byte-low 3)))
(tioga-error "(NYI) more than 23 bits of length"))
(setq result (logior result (lsh byte-low nbits)))
(setq nbits (+ nbits 7)))))
result))
;;; ---- string
(defun tread-get-str ()
(cond ((< tioga-control-next tioga-control-limit)
(let ((len (char-after tioga-control-next)))
(setq tioga-control-next (+ tioga-control-next 1))
(tread-span-control len)))
(t (tioga-error "eof getting string length"))))
;;; ---------------- fetch bytes from one of the 3 sections
;;; ---- control
(defun tread-span-control (len)
(cond ((> (+ tioga-control-next len) tioga-control-limit)
(tioga-error "eof getting rope from control"))
(t
(let ((pos tioga-control-next))
(setq tioga-control-next (+ tioga-control-next len))
(buffer-substring pos tioga-control-next)))))
;;; ---- comment
(defun tread-span-comment (len)
(cond ((> (+ tioga-comment-next len) tioga-comment-limit)
(tioga-error "eof getting rope from comment"))
(t
(let ((pos tioga-comment-next))
(setq tioga-comment-next (+ tioga-comment-next len))
(buffer-substring pos tioga-comment-next)))))
;;; ---- text
(defun tread-span-text (len)
(cond ((> (+ tioga-text-next len) tioga-text-limit)
(tioga-error "eof getting rope from text"))
(t
(let ((pos tioga-text-next))
(setq tioga-text-next (+ tioga-text-next len))
(buffer-substring pos tioga-text-next)))))
;;; ---------------- Looks
;;; Get 4 bytes of looks, MSB is high bit of the first character of string
(defun tread-get-looks-str ()
(let ((pos tioga-control-next))
(setq tioga-control-next (+ pos 4))
(cond ((<= tioga-control-next tioga-control-limit)
(buffer-substring pos tioga-control-next))
(t
(tioga-error "eof getting looks string")))))
;;; Convert looks characters to a looks vector
;;; "a" is MSB
(defun tread-look-chars-to-index (n)
(let ((result (make-string 4 0)))
(while (> n 0)
(let ((c (tread-get-byte)))
(cond ((and (>= c ?a) (<= c (+ ?a 31)))
(let* ((bitnum-in-word (- 31 (- c ?a)))
(bitnum-in-byte (% bitnum-in-word 8))
(bytenum (- 3 (/ bitnum-in-word 8))))
(aset result bytenum
(logior (aref result bytenum) (lsh 1 bitnum-in-byte)))))
(t
(tioga-error "Illegal look character: %d" c))))
(setq n (- n 1)))
(tread-looks-index result)))
;;; Register a looks vector, return index
(defun tread-looks-index (looks-str)
(cond ((string= looks-str "\000\000\000\000")
0)
(t
(let ((probe (assoc looks-str tioga-looks-table)))
(cond ((null probe)
(setq tioga-looks-table
(cons (cons looks-str tioga-looks-count)
tioga-looks-table))
(prog1
tioga-looks-count
(setq tioga-looks-count (+ tioga-looks-count 1))))
(t
(cdr probe)))))))
;;; Get a looks vector given an index
(defun tread-get-looks (index)
(let ((probe (rassq index tioga-looks-table)))
(cond (probe (car probe))
(t
(tioga-error "invalid looks index (%d)" index)))))
;;; ---------------- Format
;;; Add a new format, and return its index.
(defun tread-format-index (format-str)
(cond ((string= format-str "")
0)
(t
(let ((probe (assoc format-str tioga-format-table)))
(cond ((null probe)
(setq tioga-format-table
(cons (cons format-str tioga-format-count)
tioga-format-table))
(prog1
tioga-format-count
(setq tioga-format-count (+ tioga-format-count 1))))
(t
(cdr probe)))))))
;;; Get a format given an index.
(defun tread-get-format (index)
(let ((probe (rassq index tioga-format-table)))
(cond (probe (car probe))
(t
(tioga-error "invalid format index (%d)" index)))))
;;; ---------------- Properties
;;; Add a new prop, and return its index.
(defun tread-prop-index (prop-str)
(cond ((string= prop-str "")
0)
(t
(let ((probe (assoc prop-str tioga-prop-table)))
(cond ((null probe)
(setq tioga-prop-table
(cons (cons (downcase prop-str) tioga-prop-count)
tioga-prop-table))
(prog1
tioga-prop-count
(setq tioga-prop-count (+ tioga-prop-count 1))))
(t
(cdr probe)))))))
;;; Get a property given an index.
(defun tread-get-prop (index)
(let ((probe (rassq index tioga-prop-table)))
(cond (probe (car probe))
(t
(tioga-error "invalid property index (%d)" index)))))
;;; ---------------- Errors
(defun tioga-error (str &rest args)
(apply (function error)
(cons (concat "Error converting Tioga: " str) args)))
;;; ---------------- Development aids
;(defun create-top-table ()
; (let ((ops '(end-of-file start-node start-node-first (start-node-last 70)
; terminal-text-node terminal-text-node-first
; (terminal-text-node-last 70) other-node
; other-node-short other-node-specs
; other-node-specs-short prop prop-short end-node rope
; comment runs looks looks-first (looks-last 50) look1
; look2 look3))
; (num -1))
; (while ops
; (let ((op (car ops))
; (incr 1))
; (cond ((consp op)
; (setq incr (car (cdr op)))
; (setq op (car op))))
; (setq op (intern (concat "top-" (symbol-name op))))
; (set op (setq num (+ num incr)))
; (insert (format "\n(defconst %s %d) ; 0%o"
; op (symbol-value op) (symbol-value op)))
; (beginning-of-line)
; (indent-sexp)
; (end-of-line)
; (setq ops (cdr ops))))
; ))
;(create-top-table)
;;; ---------------- Converter utilities
(defun tioga-format-looks (looks-str)
(let ((bitnum-in-word 31)
(char ?a)
(result ""))
(while (>= bitnum-in-word 0)
(let* ((bitnum-in-byte (% bitnum-in-word 8))
(bytenum (- 3 (/ bitnum-in-word 8))))
(if (> (logand (lsh 1 bitnum-in-byte) (aref looks-str bytenum)) 0)
(setq result
(concat result (char-to-string char)))))
(setq bitnum-in-word (- bitnum-in-word 1))
(setq char (+ char 1)))
result))
(provide 'tioga)