(DEFINE-FILE-INFO READTABLE "XCL" PACKAGE (DEFPACKAGE "HASH-FILE" (USE "LISP" "XCL") (IMPORT 
WITH-READER-ENVIRONMENT MAKE-READER-ENVIRONMENT FIND-READTABLE UNINTERRUPTABLY WITH.MONITOR 
CREATE.MONITORLOCK MONITORLOCK)))
(il:filecreated " 9-Oct-87 11:21:39" il:|{POGO:AISNORTH:XEROX}<CUTTING>LISP>HASH-FILE.;3| 22840  

      il:|changes| il:|to:|  (il:vars il:hash-filecoms) (il:props (il:hash-file il:makefile-environment))
 (il:variables *reader-environment*)

      il:|previous| il:|date:| " 8-Oct-87 16:26:26" il:|{POGO:AISNORTH:XEROX}<CUTTING>LISP>HASH-FILE.;2|
)


; Copyright (c) 1987 by Xerox Corporation.  All rights reserved.

(il:prettycomprint il:hash-filecoms)

(il:rpaqq il:hash-filecoms ((il:p (provide "HASH-FILE") (export (quote (make-hash-file open-hash-file close-hash-file copy-hash-file map-hash-file get-hash-file rem-hash-file hash-file-p hash-file-count hash-file)) "HASH-FILE")) (il:structures hash-file) (il:functions %print-hash-file) (il:variables bits-per-byte bytes-per-pointer size-position count-position table-position the-null-pointer) (il:coms (il:* il:|;;;| "public code") (il:functions make-hash-file open-hash-file close-hash-file copy-hash-file map-hash-file get-hash-file put-hash-file rem-hash-file) (il:setfs get-hash-file) (il:variables *delete-old-version-on-rehash* *rehash-size* *rehash-threshold*) (il:* il:|;;;| "internal code") (il:functions rehash? rehash key->table-pointer add-entry ensure-stream-is-open next-prime write-size read-size write-count read-count write-pointer read-pointer null-pointer?) (il:* il:|;;| "conveniences") (il:functions histogram convert)) (il:coms (il:* il:|;;;| "default user code") (il:functions hash-object hash-object-internal combine) (il:variables *hash-depth*) (il:functions default-read-fn default-print-fn) (il:variables *reader-environment*)) (il:prop (il:filetype il:makefile-environment) il:hash-file))
)
(provide "HASH-FILE")
(export (quote (make-hash-file open-hash-file close-hash-file copy-hash-file map-hash-file get-hash-file rem-hash-file hash-file-p hash-file-count hash-file)) "HASH-FILE")
(defstruct (hash-file (:copier copy-hash-file-internal) (:constructor make-hash-file-internal) (:print-function %print-hash-file)) "Like a hash-table but on a file instead of in memory" (stream nil :type stream) (il:* il:|;;| "open stream on the backing file") (direction :input :type (member :input :io)) (il:* il:|;;| "the direction that stream is open in") (monitor (create.monitorlock "HASH-FILE") :type monitorlock) (il:* il:|;;| "should always be obtained before changing STREAM's position") (size nil :type integer) (il:* il:|;;| "size of the table -- determines the range for key hashing") (count 0 :type :integer) (il:* il:|;;| "number of entries currently in the hash file") (rehash-size *rehash-size* :type float) (il:* il:|;;| "factor to increase size by when re-hashing") (rehash-threshold *rehash-threshold* :type float) (il:* il:|;;| "rehash when (= ENTRIES (* SIZE REHASH-THRESHOLD)") (key-print-fn (quote default-print-fn) :type function) (il:* il:|;;| "called with KEY and STREAM to write keys on the file") (key-read-fn (quote default-read-fn) :type function) (il:* il:|;;| "called with STREAM to read a key from the file") (key-hash-fn (quote hash-object) :type function) (il:* il:|;;| "called with KEY and SIZE to obtain an integer in {0 .. SIZE-1}") (key-compare-fn (quote equal) :type function) (il:* il:|;;| "called with two keys with same hash value to resolve collisions") (value-print-fn (quote default-print-fn) :type function) (il:* il:|;;| "called with VALUE and STREAM to print values on file") (value-read-fn (quote default-read-fn) :type function) (il:* il:|;;| "called with STREAM to read a value from the file"))
(defun %print-hash-file (hash-file stream depth) (format stream "#<Hash-File on ~A>" (let* ((stream (hash-file-stream hash-file)) (namestring (namestring (pathname stream)))) (if namestring namestring stream))))
(defconstant bits-per-byte 8)
(defconstant bytes-per-pointer 4)
(defconstant size-position (* bytes-per-pointer 0))
(defconstant count-position (* bytes-per-pointer 1))
(defconstant table-position (* bytes-per-pointer 2))
(defconstant the-null-pointer 0)



(il:* il:|;;;| "public code")

(defun make-hash-file (il:* il:|;;| "MAKE-HASH-TABLE equivalent for hash files") (il:* il:|;;| "creates and returns a new hash file.") (file-name size &key (rehash-size *rehash-size*) (rehash-threshold *rehash-threshold*) (key-print-fn (quote default-print-fn)) (key-read-fn (quote default-read-fn)) (key-compare-fn (quote equal)) (key-hash-fn (quote hash-object)) (value-print-fn (quote default-print-fn)) (value-read-fn (quote default-read-fn))) (let ((stream (open file-name :direction :io :if-exists :new-version :element-type (il:bquote (unsigned-byte (il:\\\, bits-per-byte))))) (real-size (next-prime size))) (il:* il:|;;| "write the size & entries") (write-size real-size stream) (write-count 0 stream) (il:* il:|;;| "initialize table -- fill it with null pointers") (dotimes (n real-size) (write-pointer the-null-pointer stream)) (il:* il:|;;| "make & return a HASH-FILE structure") (make-hash-file-internal :stream stream :direction :io :size real-size :count 0 :rehash-size rehash-size :rehash-threshold rehash-threshold :key-print-fn key-print-fn :key-read-fn key-read-fn :key-compare-fn key-compare-fn :key-hash-fn key-hash-fn :value-print-fn value-print-fn :value-read-fn value-read-fn)))
(defun open-hash-file (il:* il:|;;| "open an existing hash file") (file-name &key (direction :input) (rehash-size *rehash-size*) (rehash-threshold *rehash-threshold*) (key-print-fn (quote default-print-fn)) (key-read-fn (quote default-read-fn)) (key-compare-fn (quote equal)) (key-hash-fn (quote hash-object)) (value-print-fn (quote default-print-fn)) (value-read-fn (quote default-read-fn))) (case direction ((:input :io)) (otherwise (error "~S illegal arg.  Must be :INPUT or :IO" direction))) (let ((stream (open file-name :direction direction :if-exists :overwrite :element-type (il:bquote (unsigned-byte (il:\\\, bits-per-byte)))))) (il:* il:|;;| "make & return a HASH-FILE structure") (make-hash-file-internal :stream stream :direction direction :size (read-size stream) :count (read-count stream) :rehash-size rehash-size :rehash-threshold rehash-threshold :key-print-fn key-print-fn :key-read-fn key-read-fn :key-compare-fn key-compare-fn :key-hash-fn key-hash-fn :value-print-fn value-print-fn :value-read-fn value-read-fn)))
(defun close-hash-file (hash-file &key abort) (il:* il:|;;| "close the stream") (with.monitor (hash-file-monitor hash-file) (let ((stream (hash-file-stream hash-file))) (close stream :abort abort) (pathname stream))))
(defun copy-hash-file (old-hash-file new-file-name &optional (new-size nil new-size-specified?)) (il:* il:|;;| "make a new hashfile in NEW-FILE-NAME with the same contents as OLD-HASH-FILE.  this will reclaim space lost in OLD-HASH-FILE.  also used by REHASH.  ") (let ((new-hash-file (make-hash-file new-file-name (if new-size-specified? new-size (il:* il:|;;| "default NEW-SIZE to the size of OLD-HASH-FILE") (hash-file-size old-hash-file)) (il:* il:|;;| "sure wish common lisp had a \"using\" construct...") :rehash-size (hash-file-rehash-size old-hash-file) :rehash-threshold (hash-file-rehash-threshold old-hash-file) :key-print-fn (hash-file-key-print-fn old-hash-file) :key-read-fn (hash-file-key-read-fn old-hash-file) :key-compare-fn (hash-file-key-compare-fn old-hash-file) :key-hash-fn (hash-file-key-hash-fn old-hash-file) :value-print-fn (hash-file-value-print-fn old-hash-file) :value-read-fn (hash-file-value-read-fn old-hash-file)))) (map-hash-file (function (lambda (key value) (setf (get-hash-file key new-hash-file) value))) old-hash-file) (il:* il:|;;| "write it out for safety") (close-hash-file new-hash-file) (il:* il:|;;| "return the new hash file") new-hash-file))
(defun map-hash-file (fn hash-file) (il:* il:|;;| "calls FN on every KEY & VALUE pair in HASH-FILE") (with.monitor (hash-file-monitor hash-file) (let* ((stream (ensure-stream-is-open hash-file)) (size (hash-file-size hash-file)) (last-pointer (+ table-position (* bytes-per-pointer (1- size)))) next-pointer) (il:* il:|;;| "loop over table") (do ((table-pointer table-position (+ table-pointer bytes-per-pointer))) ((> table-pointer last-pointer)) (il:* il:|;;| "loop down bucket") (do ((pointer (read-pointer stream table-pointer) next-pointer)) ((null-pointer? pointer) (il:* il:\; "end of bucket or empty bucket")) (il:* il:|;;| "read & save next pointer") (setq next-pointer (read-pointer stream pointer)) (il:* il:|;;| "call FN on KEY and VALUE read from file") (funcall fn (funcall (hash-file-key-read-fn hash-file) stream) (funcall (hash-file-value-read-fn hash-file) stream)))))))
(defun get-hash-file (key hash-file &optional (default nil)) (il:* il:|;;| "GETHASH for hash files") (il:* il:|;;| "returns the value stored under KEY in HASH-FILE, or DEFAULT if there is no value stored.  second value is T iff a value was found") (with.monitor (hash-file-monitor hash-file) (let ((stream (ensure-stream-is-open hash-file)) next-pointer) (il:* il:|;;| "loop down linked list in bucket") (do ((pointer (read-pointer stream (key->table-pointer key hash-file)) next-pointer)) ((null-pointer? pointer) (il:* il:|;;| "end of bucket (or empty bucket) - we lost") (values default nil)) (il:* il:|;;| "read & save next pointer") (setq next-pointer (read-pointer stream pointer)) (when (il:* il:|;;| "read key from file and compare with KEY ") (funcall (hash-file-key-compare-fn hash-file) key (funcall (hash-file-key-read-fn hash-file) stream)) (il:* il:|;;| "they match -- we won!") (return (il:* il:|;;| "read & return value") (values (funcall (hash-file-value-read-fn hash-file) stream) t)))))))
(defun put-hash-file (key hash-file value) (il:* il:|;;| "SETF method for GET-HASH-FILE") (il:* il:|;;| "stores a VALUE under KEY in HASH-FILE") (with.monitor (hash-file-monitor hash-file) (let ((table-pointer (key->table-pointer key hash-file)) (stream (ensure-stream-is-open hash-file)) next-pointer) (il:* il:|;;| "loop down bucket") (do* ((last-pointer table-pointer pointer) (il:* il:|;;| "LAST-POINTER is location of POINTER") (pointer (read-pointer stream table-pointer) next-pointer)) ((null-pointer? pointer) (il:* il:|;;| "end of bucket (or empty bucket) - nothing hashed under this key") (il:* il:|;;| "time to add a new entry to the hash file ") (cond ((rehash? hash-file) (il:* il:|;;| "pointers are off if we rehashed -- have to start over") (put-hash-file key hash-file value)) (t (il:* il:|;;| "just nconc a new entry onto the end of the bucket") (add-entry hash-file key value last-pointer the-null-pointer) (il:* il:|;;| "increment and write out the count of objects") (write-count (incf (hash-file-count hash-file)) stream)))) (il:* il:|;;| "read & save the pointer to next in bucket") (setq next-pointer (read-pointer stream pointer)) (when (il:* il:|;;| "read key from file & compare with KEY") (funcall (hash-file-key-compare-fn hash-file) key (funcall (hash-file-key-read-fn hash-file) stream)) (il:* il:|;;| "they match - already something hashed under this key") (il:* il:|;;| "splice new entry into bucket, old entry out") (add-entry hash-file key value last-pointer next-pointer) (return))) (il:* il:|;;| "return VALUE") value)))
(defun rem-hash-file (key hash-file) (il:* il:|;;| "REMHASH for hash files") (il:* il:|;;| "removes the entry (if any)  for KEY from HASH-FILE.  returns T if there was one to remove.  ") (with.monitor (hash-file-monitor hash-file) (let ((table-pointer (key->table-pointer key hash-file)) (stream (ensure-stream-is-open hash-file)) next-pointer) (il:* il:|;;| "loop down bucket") (do* ((last-pointer table-pointer pointer) (il:* il:|;;| "LAST-POINTER is location of POINTER") (pointer (read-pointer stream table-pointer) next-pointer)) ((null-pointer? pointer) (il:* il:|;;| "end of bucket (or empty bucket) - nothing hashed under this key") (quote nil)) (il:* il:|;;| "read & save the pointer to next in bucket") (setq next-pointer (read-pointer stream pointer)) (when (il:* il:|;;| "read key from file & compare with KEY") (funcall (hash-file-key-compare-fn hash-file) key (funcall (hash-file-key-read-fn hash-file) stream)) (il:* il:|;;| "they match") (il:* il:|;;| "smash NEXT-POINTER into LAST-POINTER") (write-pointer next-pointer stream last-pointer) (il:* il:|;;| "decrement the count of entries in HASH-FILE") (write-count (decf (hash-file-count hash-file)) stream) (return (quote t)))))))
(defsetf get-hash-file put-hash-file)
(defvar *delete-old-version-on-rehash* nil "if non-NIL then delete the old version of a hash file when rehashing")
(defvar *rehash-size* 2.0 "default REHASH-SIZE for hash files")
(defvar *rehash-threshold* 0.875 "default REHASH-THRESHOLD for hash files")



(il:* il:|;;;| "internal code")

(defun rehash? (hash-file) (il:* il:|;;| "check if it's time to rehash HASH-FILE.  if it is, then do so and return non-NIL") (when (>= (1+ (hash-file-count hash-file)) (* (hash-file-size hash-file) (hash-file-rehash-threshold hash-file))) (rehash hash-file (round (* (hash-file-size hash-file) (hash-file-rehash-size hash-file)))) t))
(defun rehash (hash-file new-size) (il:* il:|;;| "caution: assumes we're under hash file monitor") (let* ((old-pathname (pathname (hash-file-stream hash-file))) (temp-hash-file (copy-hash-file hash-file (make-pathname :version :newest :defaults (pathname old-pathname)) new-size))) (il:* il:|;;| "close the old stream (before we lose pointer to it)") (close-hash-file hash-file) (il:* il:|;;| "smash TEMP-HASH-FILE into HASH-FILE") (uninterruptably (setf (hash-file-size hash-file) (il:* il:\; "note: probably not the same as NEW-SIZE") (hash-file-size temp-hash-file)) (setf (hash-file-count hash-file) (hash-file-count temp-hash-file)) (setf (hash-file-stream hash-file) (hash-file-stream temp-hash-file))) (il:* il:|;;| "our caller [PUT-HASH-FILE] expects the stream to be open") (ensure-stream-is-open hash-file) (if *delete-old-version-on-rehash* (delete-file old-pathname)) (il:* il:|;;| "return the hash file") hash-file))
(defmacro key->table-pointer (key hash-file) (il:* il:|;;| "return the file position for the head of the bucket which key hashes into.  this is the guy who does the hashing.  ") (il:* il:|;;| "caution: HASH-FILE is evaluated twice") (il:bquote (+ table-position (* bytes-per-pointer (funcall (hash-file-key-hash-fn (il:\\\, hash-file)) (il:\\\, key) (hash-file-size (il:\\\, hash-file)))))))
(defun add-entry (hash-file key value last-pointer link-pointer) (il:* il:|;;| "write an entry at end of  file, putting a pointer to it in LAST-POINTER and make it point to LINK-POINTER as next in bucket.") (il:* il:|;;| "caution: we presume we've got the hash-file-monitor.") (let* ((stream (hash-file-stream hash-file)) (eof-pointer (file-length stream))) (il:* il:|;;| "first overwrite LAST-POINTER with a pointer to EOF ") (write-pointer eof-pointer stream last-pointer) (il:* il:|;;| "write link to next bucket ") (write-pointer link-pointer stream eof-pointer) (il:* il:|;;| "write the key") (funcall (hash-file-key-print-fn hash-file) key stream) (il:* il:|;;| "write the value") (funcall (hash-file-value-print-fn hash-file) value stream) (il:* il:|;;| "return value") value))
(defun ensure-stream-is-open (hash-file) (il:* il:|;;| "makes sure HASH-FILE's stream is open") (il:* il:|;;| "caution: assumes we're under hash file monitor") (let ((stream (hash-file-stream hash-file))) (if (open-stream-p stream) stream (setf (hash-file-stream hash-file) (open stream :direction (hash-file-direction hash-file) :if-exists :overwrite)))))
(defun next-prime (n) (il:* il:|;;| "return the next prime number greater than N") (il:* il:|;;| "algorithm stolen from CDL's FIND1STPRIME in old HASH library") (let (found?) (do ((p (logior n 1) (+ p 2))) ((do* ((i 3 (+ i 2))) ((or (and (< i p) (zerop (rem p i))) (setq found? (< p (* i i)))) found?)) p))))
(defun write-size (size stream) (il:* il:|;;| "write SIZE to file as a pointer sized number") (write-pointer size stream size-position))
(defun read-size (stream) (il:* il:|;;| "read size from file as written by WRITE-SIZE") (read-pointer stream size-position))
(defun write-count (count stream) (il:* il:|;;| "write COUNT to file as a pointer sized number") (write-pointer count stream count-position))
(defun read-count (stream) (il:* il:|;;| "read count as written by WRITE-COUNT") (read-pointer stream count-position))
(defun write-pointer (pointer stream &optional position) (il:* il:|;;| "write POINTER (a non-negative integer) as BYTES-PER-POINTER bytes on STREAM s.t. READ-POINTER can reconstruct it.  if POSITION is specified then set STREAM's file position to it first.") (when (> (integer-length pointer) (* bytes-per-pointer bits-per-byte)) (error "~S : pointer too large" pointer)) (when position (file-position stream position)) (dotimes (n bytes-per-pointer) (write-byte (ldb (byte bits-per-byte (* n bits-per-byte)) pointer) stream)) (il:* il:|;;| "return POINTER") pointer)
(defun read-pointer (stream &optional position) (il:* il:|;;| "read from STREAM a positive integer written by WRITE-POINTER.  if POSITION is specified the file position will be set to it first.") (il:* il:|;;| "read BYTES-PER-POINTER bytes from stream and return them as an integer.  this is the inverse of WRITE-P ") (when position (file-position stream position)) (let ((value 0) byte) (dotimes (n bytes-per-pointer) (setq byte (read-byte stream)) (when (not (zerop byte)) (il:* il:|;;| "optimization: DPB is really slow w/ high bytes") (setq value (dpb byte (byte bits-per-byte (* n bits-per-byte)) value)))) value))
(defmacro null-pointer? (pointer) (il:bquote (eql (il:\\\, pointer) the-null-pointer)))



(il:* il:|;;| "conveniences")

(defun histogram (hash-file) (il:* il:|;;| "return an ALIST of bucket depths dotted with number of occurences") (with.monitor (hash-file-monitor hash-file) (let* ((stream (ensure-stream-is-open hash-file)) (size (hash-file-size hash-file)) (last-pointer (+ table-position (* bytes-per-pointer (1- size)))) next-pointer result) (il:* il:|;;| "loop over table") (do ((table-pointer table-position (+ table-pointer bytes-per-pointer))) ((> table-pointer last-pointer)) (il:* il:|;;| "loop down bucket") (do ((pointer (read-pointer stream table-pointer) next-pointer) (bucket-length 0 (1+ bucket-length))) ((null-pointer? pointer) (il:* il:|;;| "end of bucket or empty bucket") (il:* il:|;;| "increment count for buckets of this length") (incf (cdr (or (assoc bucket-length result) (car (push (cons bucket-length 0) result)))))) (setq next-pointer (read-pointer stream pointer)))) (sort result (function (lambda (pair-1 pair-2) (< (car pair-1) (car pair-2))))))))
(defun convert (il-hash-file cl-hash-file) "convert a HASH hash file into a HASH-FILE hash file" (il:* il:|;;| "first make sure HASH is loaded") (il:filesload (il:sysload il:from il:lispusers) hash) (let* ((old-hash-file (il:openhashfile il-hash-file)) (new-hash-file (make-hash-file cl-hash-file (il:hashfileprop old-hash-file (quote il:size)))) (abort (quote t))) (unwind-protect (progn (il:maphashfile old-hash-file (function (lambda (key value) (put-hash-file key new-hash-file value)))) (setq abort (quote nil))) (il:closehashfile old-hash-file) (close-hash-file new-hash-file :abort abort))))



(il:* il:|;;;| "default user code")

(defun hash-object (object range) (il:* il:|;;;| "return an integer between 0 and (1- RANGE), inclusive") (il:* il:|;;;| "objects which are EQUAL will return the same integer") (1- (hash-object-internal object range 0)))
(defun hash-object-internal (object range depth) (il:* il:|;;| "recursively descend OBJECT, combining characters & integers at leaves with multiplication modulo RANGE.  never descend more than *HASH-DEPTH* into a structure.") (il:* il:|;;| "return an integer between 1 and RANGE, inclusive") (if (eql depth *hash-depth*) 1 (typecase object (string (let ((value 1) (length (length object))) (dotimes (n (min length (- *hash-depth* depth)) (combine range value length)) (setf value (combine range value (char-code (char object n))))))) (symbol (il:* il:|;;| "combine hash values of name and package name") (combine range (hash-object-internal (package-name (symbol-package object)) range (1+ depth)) (hash-object-internal (symbol-name object) range (1+ depth)))) (cons (il:* il:|;;| "combine hash values of CAR and CDR") (combine range (hash-object-internal (car object) range (1+ depth)) (hash-object-internal (cdr object) range (1+ depth)))) (number (typecase object (integer (combine range (abs object))) (float (multiple-value-bind (sig expon) (integer-decode-float object) (combine range sig (abs expon)))) (ratio (combine range (abs (numerator object)) (denominator object))) (complex (combine range (hash-object-internal (realpart object) range (1+ depth)) (hash-object-internal (imagpart object) range (1+ depth)))))) (character (combine range (char-code object))) (pathname (hash-object-internal (namestring object) range (1+ depth))) (bit-vector (let ((value 1) (length (length object))) (dotimes (n (min length (- *hash-depth* depth)) (combine range value length)) (setf value (combine range value (if (zerop (bit object n)) 0 (expt 2 n))))))) (t (il:* il:|;;| "can't dependably read/print other objects") (error "Can't hash a(n) ~S" (type-of object))))))
(defmacro combine (range &rest integers) (il:* il:|;;;| "combine non-negative integers returning an integer between 1 and RANGE inclusive (zeros are bad when combining with multiplication).  we don't do the obvious (1+ (mod (* . integers) range)) to avoid making bignums. ") (il:* il:|;;;| " caution: RANGE may be evaluated many times.") (il:bquote (1+ (mod (il:\\\, (if (endp (rest integers)) (first integers) (il:bquote (* (il:\\\, (first integers)) (combine (il:\\\, range) (il:\\\,@ (rest integers))))))) (il:\\\, range)))))
(defvar *hash-depth* 17)
(defun default-read-fn (stream) (il:* il:|;;| "default reader for hash files") (with-reader-environment *reader-environment* (read stream)))
(defun default-print-fn (object stream) (il:* il:|;;| "default printer for hash files") (with-reader-environment *reader-environment* (let ((*print-pretty* (quote nil))) (print object stream))) object)
(defvar *reader-environment* (make-reader-environment (find-package "XCL") (find-readtable "XCL") 10))

(il:putprops il:hash-file il:filetype :compile-file)

(il:putprops il:hash-file il:makefile-environment (:readtable "XCL" :package (defpackage "HASH-FILE" (:use "LISP" "XCL") (:import with-reader-environment make-reader-environment find-readtable uninterruptably with.monitor create.monitorlock monitorlock)))
)
(il:putprops il:hash-file il:copyright ("Xerox Corporation" 1987))
(il:declare\: il:dontcopy
  (il:filemap (nil)))
il:stop