(DEFINE-FILE-INFO READTABLE "XCL" PACKAGE (DEFPACKAGE "HASH-FILE" (USE "LISP" "XCL") (IMPORT WITH-READER-ENVIRONMENT MAKE-READER-ENVIRONMENT FIND-READTABLE UNINTERRUPTABLY WITH.MONITOR CREATE.MONITORLOCK MONITORLOCK))) (IL:FILECREATED "11-Jun-90 16:24:56" IL:|{DSK}local>lde>lispcore>library>HASH-FILE.;2| 31647 IL:|changes| IL:|to:| (IL:VARS IL:HASH-FILECOMS) IL:|previous| IL:|date:| " 1-Mar-88 14:55:31" IL:|{DSK}local>lde>lispcore>library>HASH-FILE.;1|) ; Copyright (c) 1987, 1988, 1990 by Venue & Xerox Corporation. All rights reserved. (IL:PRETTYCOMPRINT IL:HASH-FILECOMS) (IL:RPAQQ IL:HASH-FILECOMS ((IL:P (PROVIDE "HASH-FILE") (EXPORT '(MAKE-HASH-FILE OPEN-HASH-FILE CLOSE-HASH-FILE COPY-HASH-FILE MAP-HASH-FILE GET-HASH-FILE REM-HASH-FILE HASH-FILE-P HASH-FILE-COUNT HASH-FILE) "HASH-FILE")) (IL:STRUCTURES HASH-FILE) (IL:FUNCTIONS %PRINT-HASH-FILE) (IL:VARIABLES BITS-PER-BYTE BYTES-PER-POINTER SIZE-POSITION COUNT-POSITION TABLE-POSITION THE-NULL-POINTER) (IL:COMS (IL:* IL:|;;;| "public code") (IL:FUNCTIONS MAKE-HASH-FILE OPEN-HASH-FILE CLOSE-HASH-FILE COPY-HASH-FILE MAP-HASH-FILE GET-HASH-FILE PUT-HASH-FILE REM-HASH-FILE) (IL:SETFS GET-HASH-FILE) (IL:VARIABLES *DELETE-OLD-VERSION-ON-REHASH* *REHASH-SIZE* *REHASH-THRESHOLD*) (IL:* IL:|;;;| "internal code") (IL:FUNCTIONS REHASH? REHASH KEY->TABLE-POINTER ADD-ENTRY ENSURE-STREAM-IS-OPEN NEXT-PRIME WRITE-SIZE READ-SIZE WRITE-COUNT READ-COUNT WRITE-POINTER READ-POINTER NULL-POINTER?) (IL:* IL:|;;| "conveniences") (IL:FUNCTIONS HISTOGRAM CONVERT)) (IL:COMS (IL:* IL:|;;;| "default user code") (IL:FUNCTIONS HASH-OBJECT HASH-OBJECT-INTERNAL COMBINE) (IL:VARIABLES *HASH-DEPTH*) (IL:FUNCTIONS DEFAULT-READ-FN DEFAULT-PRINT-FN) (IL:VARIABLES *READER-ENVIRONMENT*)) (IL:PROP (IL:FILETYPE IL:MAKEFILE-ENVIRONMENT) IL:HASH-FILE))) (PROVIDE "HASH-FILE") (EXPORT '(MAKE-HASH-FILE OPEN-HASH-FILE CLOSE-HASH-FILE COPY-HASH-FILE MAP-HASH-FILE GET-HASH-FILE REM-HASH-FILE HASH-FILE-P HASH-FILE-COUNT HASH-FILE) "HASH-FILE") (DEFSTRUCT (HASH-FILE (:COPIER COPY-HASH-FILE-INTERNAL) (:CONSTRUCTOR MAKE-HASH-FILE-INTERNAL) (:PRINT-FUNCTION %PRINT-HASH-FILE)) "Like a hash-table but on a file instead of in memory" (STREAM NIL :TYPE STREAM) (IL:* IL:|;;| "open stream on the backing file") (DIRECTION :INPUT :TYPE (MEMBER :INPUT :IO)) (IL:* IL:|;;| "the direction that stream is open in") (MONITOR (CREATE.MONITORLOCK "HASH-FILE") :TYPE MONITORLOCK) (IL:* IL:|;;| "should always be obtained before changing STREAM's position") (SIZE NIL :TYPE INTEGER) (IL:* IL:|;;| "size of the table -- determines the range for key hashing") (COUNT 0 :TYPE :INTEGER) (IL:* IL:|;;| "number of entries currently in the hash file") (REHASH-SIZE *REHASH-SIZE* :TYPE FLOAT) (IL:* IL:|;;| "factor to increase size by when re-hashing") (REHASH-THRESHOLD *REHASH-THRESHOLD* :TYPE FLOAT) (IL:* IL:|;;| "rehash when (= ENTRIES (* SIZE REHASH-THRESHOLD)") (KEY-PRINT-FN 'DEFAULT-PRINT-FN :TYPE FUNCTION) (IL:* IL:|;;| "called with KEY and STREAM to write keys on the file") (KEY-READ-FN 'DEFAULT-READ-FN :TYPE FUNCTION) (IL:* IL:|;;| "called with STREAM to read a key from the file") (KEY-HASH-FN 'HASH-OBJECT :TYPE FUNCTION) (IL:* IL:|;;| "called with KEY and SIZE to obtain an integer in {0 .. SIZE-1}") (KEY-COMPARE-FN 'EQUAL :TYPE FUNCTION) (IL:* IL:|;;| "called with two keys with same hash value to resolve collisions") (VALUE-PRINT-FN 'DEFAULT-PRINT-FN :TYPE FUNCTION) (IL:* IL:|;;| "called with VALUE and STREAM to print values on file") (VALUE-READ-FN 'DEFAULT-READ-FN :TYPE FUNCTION) (IL:* IL:|;;| "called with STREAM to read a value from the file") ) (DEFUN %PRINT-HASH-FILE (HASH-FILE STREAM DEPTH) (FORMAT STREAM "#" (LET* ((STREAM (HASH-FILE-STREAM HASH-FILE)) (NAMESTRING (NAMESTRING (PATHNAME STREAM)))) (IF NAMESTRING NAMESTRING STREAM)))) (DEFCONSTANT BITS-PER-BYTE 8) (DEFCONSTANT BYTES-PER-POINTER 4) (DEFCONSTANT SIZE-POSITION (* BYTES-PER-POINTER 0)) (DEFCONSTANT COUNT-POSITION (* BYTES-PER-POINTER 1)) (DEFCONSTANT TABLE-POSITION (* BYTES-PER-POINTER 2)) (DEFCONSTANT THE-NULL-POINTER 0) (IL:* IL:|;;;| "public code") (DEFUN MAKE-HASH-FILE (IL:* IL:|;;| "MAKE-HASH-TABLE equivalent for hash files") (IL:* IL:|;;| "creates and returns a new hash file.") (FILE-NAME SIZE &KEY (REHASH-SIZE *REHASH-SIZE*) (REHASH-THRESHOLD *REHASH-THRESHOLD*) (KEY-PRINT-FN 'DEFAULT-PRINT-FN) (KEY-READ-FN 'DEFAULT-READ-FN) (KEY-COMPARE-FN 'EQUAL) (KEY-HASH-FN 'HASH-OBJECT) (VALUE-PRINT-FN 'DEFAULT-PRINT-FN) (VALUE-READ-FN 'DEFAULT-READ-FN)) (LET ((STREAM (OPEN FILE-NAME :DIRECTION :IO :IF-EXISTS :NEW-VERSION :ELEMENT-TYPE `(UNSIGNED-BYTE ,BITS-PER-BYTE))) (REAL-SIZE (NEXT-PRIME SIZE))) (IL:* IL:|;;| "write the size & entries") (WRITE-SIZE REAL-SIZE STREAM) (WRITE-COUNT 0 STREAM) (IL:* IL:|;;| "initialize table -- fill it with null pointers") (DOTIMES (N REAL-SIZE) (WRITE-POINTER THE-NULL-POINTER STREAM)) (IL:* IL:|;;| "make & return a HASH-FILE structure") (MAKE-HASH-FILE-INTERNAL :STREAM STREAM :DIRECTION :IO :SIZE REAL-SIZE :COUNT 0 :REHASH-SIZE REHASH-SIZE :REHASH-THRESHOLD REHASH-THRESHOLD :KEY-PRINT-FN KEY-PRINT-FN :KEY-READ-FN KEY-READ-FN :KEY-COMPARE-FN KEY-COMPARE-FN :KEY-HASH-FN KEY-HASH-FN :VALUE-PRINT-FN VALUE-PRINT-FN :VALUE-READ-FN VALUE-READ-FN))) (DEFUN OPEN-HASH-FILE (IL:* IL:|;;| "open an existing hash file") (FILE-NAME &KEY (DIRECTION :INPUT) (REHASH-SIZE *REHASH-SIZE*) (REHASH-THRESHOLD *REHASH-THRESHOLD*) (KEY-PRINT-FN 'DEFAULT-PRINT-FN) (KEY-READ-FN 'DEFAULT-READ-FN) (KEY-COMPARE-FN 'EQUAL) (KEY-HASH-FN 'HASH-OBJECT) (VALUE-PRINT-FN 'DEFAULT-PRINT-FN) (VALUE-READ-FN 'DEFAULT-READ-FN)) (CASE DIRECTION ((:INPUT :IO) ) (OTHERWISE (ERROR "~S illegal arg. Must be :INPUT or :IO" DIRECTION))) (LET ((STREAM (OPEN FILE-NAME :DIRECTION DIRECTION :IF-EXISTS :OVERWRITE :ELEMENT-TYPE `(UNSIGNED-BYTE ,BITS-PER-BYTE)))) (IL:* IL:|;;| "make & return a HASH-FILE structure") (MAKE-HASH-FILE-INTERNAL :STREAM STREAM :DIRECTION DIRECTION :SIZE (READ-SIZE STREAM) :COUNT (READ-COUNT STREAM) :REHASH-SIZE REHASH-SIZE :REHASH-THRESHOLD REHASH-THRESHOLD :KEY-PRINT-FN KEY-PRINT-FN :KEY-READ-FN KEY-READ-FN :KEY-COMPARE-FN KEY-COMPARE-FN :KEY-HASH-FN KEY-HASH-FN :VALUE-PRINT-FN VALUE-PRINT-FN :VALUE-READ-FN VALUE-READ-FN))) (DEFUN CLOSE-HASH-FILE (HASH-FILE &KEY ABORT) (IL:* IL:|;;| "close the stream") (WITH.MONITOR (HASH-FILE-MONITOR HASH-FILE) (LET ((STREAM (HASH-FILE-STREAM HASH-FILE))) (CLOSE STREAM :ABORT ABORT) (PATHNAME STREAM)))) (DEFUN COPY-HASH-FILE (OLD-HASH-FILE NEW-FILE-NAME &OPTIONAL (NEW-SIZE NIL NEW-SIZE-SPECIFIED?)) (IL:* IL:|;;| "make a new hashfile in NEW-FILE-NAME with the same contents as OLD-HASH-FILE. this will reclaim space lost in OLD-HASH-FILE. also used by REHASH. ") (LET ((NEW-HASH-FILE (MAKE-HASH-FILE NEW-FILE-NAME (IF NEW-SIZE-SPECIFIED? NEW-SIZE (IL:* IL:|;;|  "default NEW-SIZE to the size of OLD-HASH-FILE") (HASH-FILE-SIZE OLD-HASH-FILE)) (IL:* IL:|;;| "sure wish common lisp had a \"using\" construct...") :REHASH-SIZE (HASH-FILE-REHASH-SIZE OLD-HASH-FILE) :REHASH-THRESHOLD (HASH-FILE-REHASH-THRESHOLD OLD-HASH-FILE) :KEY-PRINT-FN (HASH-FILE-KEY-PRINT-FN OLD-HASH-FILE) :KEY-READ-FN (HASH-FILE-KEY-READ-FN OLD-HASH-FILE) :KEY-COMPARE-FN (HASH-FILE-KEY-COMPARE-FN OLD-HASH-FILE) :KEY-HASH-FN (HASH-FILE-KEY-HASH-FN OLD-HASH-FILE) :VALUE-PRINT-FN (HASH-FILE-VALUE-PRINT-FN OLD-HASH-FILE) :VALUE-READ-FN (HASH-FILE-VALUE-READ-FN OLD-HASH-FILE)))) (MAP-HASH-FILE #'(LAMBDA (KEY VALUE) (SETF (GET-HASH-FILE KEY NEW-HASH-FILE) VALUE)) OLD-HASH-FILE) (IL:* IL:|;;| "write it out for safety") (CLOSE-HASH-FILE NEW-HASH-FILE) (IL:* IL:|;;| "return the new hash file") NEW-HASH-FILE)) (DEFUN MAP-HASH-FILE (FN HASH-FILE) (IL:* IL:|;;| "calls FN on every KEY & VALUE pair in HASH-FILE") (WITH.MONITOR (HASH-FILE-MONITOR HASH-FILE) (LET* ((STREAM (ENSURE-STREAM-IS-OPEN HASH-FILE)) (SIZE (HASH-FILE-SIZE HASH-FILE)) (LAST-POINTER (+ TABLE-POSITION (* BYTES-PER-POINTER (1- SIZE)))) NEXT-POINTER) (IL:* IL:|;;| "loop over table") (DO ((TABLE-POINTER TABLE-POSITION (+ TABLE-POINTER BYTES-PER-POINTER))) ((> TABLE-POINTER LAST-POINTER)) (IL:* IL:|;;| "loop down bucket") (DO ((POINTER (READ-POINTER STREAM TABLE-POINTER) NEXT-POINTER)) ((NULL-POINTER? POINTER) (IL:* IL:\;  "end of bucket or empty bucket") ) (IL:* IL:|;;| "read & save next pointer") (SETQ NEXT-POINTER (READ-POINTER STREAM POINTER)) (IL:* IL:|;;| "call FN on KEY and VALUE read from file") (FUNCALL FN (FUNCALL (HASH-FILE-KEY-READ-FN HASH-FILE) STREAM) (FUNCALL (HASH-FILE-VALUE-READ-FN HASH-FILE) STREAM))))))) (DEFUN GET-HASH-FILE (KEY HASH-FILE &OPTIONAL (DEFAULT NIL)) (IL:* IL:|;;| "GETHASH for hash files") (IL:* IL:|;;| "returns the value stored under KEY in HASH-FILE, or DEFAULT if there is no value stored. second value is T iff a value was found") (WITH.MONITOR (HASH-FILE-MONITOR HASH-FILE) (LET ((STREAM (ENSURE-STREAM-IS-OPEN HASH-FILE)) NEXT-POINTER) (IL:* IL:|;;| "loop down linked list in bucket") (DO ((POINTER (READ-POINTER STREAM (KEY->TABLE-POINTER KEY HASH-FILE)) NEXT-POINTER)) ((NULL-POINTER? POINTER) (IL:* IL:|;;| "end of bucket (or empty bucket) - we lost") (VALUES DEFAULT NIL)) (IL:* IL:|;;| "read & save next pointer") (SETQ NEXT-POINTER (READ-POINTER STREAM POINTER)) (WHEN (IL:* IL:|;;| "read key from file and compare with KEY ") (FUNCALL (HASH-FILE-KEY-COMPARE-FN HASH-FILE) KEY (FUNCALL (HASH-FILE-KEY-READ-FN HASH-FILE) STREAM)) (IL:* IL:|;;| "they match -- we won!") (RETURN (IL:* IL:|;;| "read & return value") (VALUES (FUNCALL (HASH-FILE-VALUE-READ-FN HASH-FILE) STREAM) T))))))) (DEFUN PUT-HASH-FILE (KEY HASH-FILE VALUE) (IL:* IL:|;;| "SETF method for GET-HASH-FILE") (IL:* IL:|;;| "stores a VALUE under KEY in HASH-FILE") (WITH.MONITOR (HASH-FILE-MONITOR HASH-FILE) (LET ((TABLE-POINTER (KEY->TABLE-POINTER KEY HASH-FILE)) (STREAM (ENSURE-STREAM-IS-OPEN HASH-FILE)) NEXT-POINTER) (IL:* IL:|;;| "loop down bucket") (DO* ((LAST-POINTER TABLE-POINTER POINTER) (IL:* IL:|;;| "LAST-POINTER is location of POINTER") (POINTER (READ-POINTER STREAM TABLE-POINTER) NEXT-POINTER)) ((NULL-POINTER? POINTER) (IL:* IL:|;;| "end of bucket (or empty bucket) - nothing hashed under this key") (IL:* IL:|;;| "time to add a new entry to the hash file ") (COND ((REHASH? HASH-FILE) (IL:* IL:|;;| "pointers are off if we rehashed -- have to start over") (PUT-HASH-FILE KEY HASH-FILE VALUE)) (T (IL:* IL:|;;| "just nconc a new entry onto the end of the bucket") (ADD-ENTRY HASH-FILE KEY VALUE LAST-POINTER THE-NULL-POINTER) (IL:* IL:|;;| "increment and write out the count of objects") (WRITE-COUNT (INCF (HASH-FILE-COUNT HASH-FILE)) STREAM)))) (IL:* IL:|;;| "read & save the pointer to next in bucket") (SETQ NEXT-POINTER (READ-POINTER STREAM POINTER)) (WHEN (IL:* IL:|;;| "read key from file & compare with KEY") (FUNCALL (HASH-FILE-KEY-COMPARE-FN HASH-FILE) KEY (FUNCALL (HASH-FILE-KEY-READ-FN HASH-FILE) STREAM)) (IL:* IL:|;;| "they match - already something hashed under this key") (IL:* IL:|;;| "splice new entry into bucket, old entry out") (ADD-ENTRY HASH-FILE KEY VALUE LAST-POINTER NEXT-POINTER) (RETURN))) (IL:* IL:|;;| "return VALUE") VALUE))) (DEFUN REM-HASH-FILE (KEY HASH-FILE) (IL:* IL:|;;| "REMHASH for hash files") (IL:* IL:|;;|  "removes the entry (if any) for KEY from HASH-FILE. returns T if there was one to remove. ") (WITH.MONITOR (HASH-FILE-MONITOR HASH-FILE) (LET ((TABLE-POINTER (KEY->TABLE-POINTER KEY HASH-FILE)) (STREAM (ENSURE-STREAM-IS-OPEN HASH-FILE)) NEXT-POINTER) (IL:* IL:|;;| "loop down bucket") (DO* ((LAST-POINTER TABLE-POINTER POINTER) (IL:* IL:|;;| "LAST-POINTER is location of POINTER") (POINTER (READ-POINTER STREAM TABLE-POINTER) NEXT-POINTER)) ((NULL-POINTER? POINTER) (IL:* IL:|;;| "end of bucket (or empty bucket) - nothing hashed under this key") 'NIL) (IL:* IL:|;;| "read & save the pointer to next in bucket") (SETQ NEXT-POINTER (READ-POINTER STREAM POINTER)) (WHEN (IL:* IL:|;;| "read key from file & compare with KEY") (FUNCALL (HASH-FILE-KEY-COMPARE-FN HASH-FILE) KEY (FUNCALL (HASH-FILE-KEY-READ-FN HASH-FILE) STREAM)) (IL:* IL:|;;| "they match") (IL:* IL:|;;| "smash NEXT-POINTER into LAST-POINTER") (WRITE-POINTER NEXT-POINTER STREAM LAST-POINTER) (IL:* IL:|;;| "decrement the count of entries in HASH-FILE") (WRITE-COUNT (DECF (HASH-FILE-COUNT HASH-FILE)) STREAM) (RETURN 'T)))))) (DEFSETF GET-HASH-FILE PUT-HASH-FILE) (DEFVAR *DELETE-OLD-VERSION-ON-REHASH* NIL "if non-NIL then delete the old version of a hash file when rehashing") (DEFVAR *REHASH-SIZE* 2.0 "default REHASH-SIZE for hash files") (DEFVAR *REHASH-THRESHOLD* 0.875 "default REHASH-THRESHOLD for hash files") (IL:* IL:|;;;| "internal code") (DEFUN REHASH? (HASH-FILE) (IL:* IL:|;;| "check if it's time to rehash HASH-FILE. if it is, then do so and return non-NIL") (WHEN (>= (1+ (HASH-FILE-COUNT HASH-FILE)) (* (HASH-FILE-SIZE HASH-FILE) (HASH-FILE-REHASH-THRESHOLD HASH-FILE))) (REHASH HASH-FILE (ROUND (* (HASH-FILE-SIZE HASH-FILE) (HASH-FILE-REHASH-SIZE HASH-FILE)))) T)) (DEFUN REHASH (HASH-FILE NEW-SIZE) (IL:* IL:|;;| "caution: assumes we're under hash file monitor") (LET* ((OLD-PATHNAME (PATHNAME (HASH-FILE-STREAM HASH-FILE))) (TEMP-HASH-FILE (COPY-HASH-FILE HASH-FILE (MAKE-PATHNAME :VERSION :NEWEST :DEFAULTS (PATHNAME OLD-PATHNAME)) NEW-SIZE))) (IL:* IL:|;;| "close the old stream (before we lose pointer to it)") (CLOSE-HASH-FILE HASH-FILE) (IL:* IL:|;;| "smash TEMP-HASH-FILE into HASH-FILE") (UNINTERRUPTABLY (SETF (HASH-FILE-SIZE HASH-FILE) (IL:* IL:\;  "note: probably not the same as NEW-SIZE") (HASH-FILE-SIZE TEMP-HASH-FILE)) (SETF (HASH-FILE-COUNT HASH-FILE) (HASH-FILE-COUNT TEMP-HASH-FILE)) (SETF (HASH-FILE-STREAM HASH-FILE) (HASH-FILE-STREAM TEMP-HASH-FILE))) (IL:* IL:|;;| "our caller [PUT-HASH-FILE] expects the stream to be open") (ENSURE-STREAM-IS-OPEN HASH-FILE) (IF *DELETE-OLD-VERSION-ON-REHASH* (DELETE-FILE OLD-PATHNAME)) (IL:* IL:|;;| "return the hash file") HASH-FILE)) (DEFMACRO KEY->TABLE-POINTER (KEY HASH-FILE) (IL:* IL:|;;| "return the file position for the head of the bucket which key hashes into. this is the guy who does the hashing. ") (IL:* IL:|;;| "caution: HASH-FILE is evaluated twice") `(+ TABLE-POSITION (* BYTES-PER-POINTER (FUNCALL (HASH-FILE-KEY-HASH-FN ,HASH-FILE) ,KEY (HASH-FILE-SIZE ,HASH-FILE))))) (DEFUN ADD-ENTRY (HASH-FILE KEY VALUE LAST-POINTER LINK-POINTER) (IL:* IL:|;;| "write an entry at end of file, putting a pointer to it in LAST-POINTER and make it point to LINK-POINTER as next in bucket.") (IL:* IL:|;;| "caution: we presume we've got the hash-file-monitor.") (LET* ((STREAM (HASH-FILE-STREAM HASH-FILE)) (EOF-POINTER (FILE-LENGTH STREAM))) (IL:* IL:|;;| "first overwrite LAST-POINTER with a pointer to EOF ") (WRITE-POINTER EOF-POINTER STREAM LAST-POINTER) (IL:* IL:|;;| "write link to next bucket ") (WRITE-POINTER LINK-POINTER STREAM EOF-POINTER) (IL:* IL:|;;| "write the key") (FUNCALL (HASH-FILE-KEY-PRINT-FN HASH-FILE) KEY STREAM) (IL:* IL:|;;| "write the value") (FUNCALL (HASH-FILE-VALUE-PRINT-FN HASH-FILE) VALUE STREAM) (IL:* IL:|;;| "return value") VALUE)) (DEFUN ENSURE-STREAM-IS-OPEN (HASH-FILE) (IL:* IL:|;;| "makes sure HASH-FILE's stream is open") (IL:* IL:|;;| "caution: assumes we're under hash file monitor") (LET ((STREAM (HASH-FILE-STREAM HASH-FILE))) (IF (OPEN-STREAM-P STREAM) STREAM (SETF (HASH-FILE-STREAM HASH-FILE) (OPEN STREAM :DIRECTION (HASH-FILE-DIRECTION HASH-FILE) :IF-EXISTS :OVERWRITE))))) (DEFUN NEXT-PRIME (N) (IL:* IL:|;;| "return the next prime number greater than N") (IL:* IL:|;;| "algorithm stolen from CDL's FIND1STPRIME in old HASH library") (LET (FOUND?) (DO ((P (LOGIOR N 1) (+ P 2))) ((DO* ((I 3 (+ I 2))) ((OR (AND (< I P) (ZEROP (REM P I))) (SETQ FOUND? (< P (* I I)))) FOUND?)) P)))) (DEFUN WRITE-SIZE (SIZE STREAM) (IL:* IL:|;;| "write SIZE to file as a pointer sized number") (WRITE-POINTER SIZE STREAM SIZE-POSITION)) (DEFUN READ-SIZE (STREAM) (IL:* IL:|;;| "read size from file as written by WRITE-SIZE") (READ-POINTER STREAM SIZE-POSITION)) (DEFUN WRITE-COUNT (COUNT STREAM) (IL:* IL:|;;| "write COUNT to file as a pointer sized number") (WRITE-POINTER COUNT STREAM COUNT-POSITION)) (DEFUN READ-COUNT (STREAM) (IL:* IL:|;;| "read count as written by WRITE-COUNT") (READ-POINTER STREAM COUNT-POSITION)) (DEFUN WRITE-POINTER (POINTER STREAM &OPTIONAL POSITION) (IL:* IL:|;;| "write POINTER (a non-negative integer) as BYTES-PER-POINTER bytes on STREAM s.t. READ-POINTER can reconstruct it. if POSITION is specified then set STREAM's file position to it first.") (WHEN (> (INTEGER-LENGTH POINTER) (* BYTES-PER-POINTER BITS-PER-BYTE)) (ERROR "~S : pointer too large" POINTER)) (WHEN POSITION (FILE-POSITION STREAM POSITION)) (DOTIMES (N BYTES-PER-POINTER) (WRITE-BYTE (LDB (BYTE BITS-PER-BYTE (* N BITS-PER-BYTE)) POINTER) STREAM)) (IL:* IL:|;;| "return POINTER") POINTER) (DEFUN READ-POINTER (STREAM &OPTIONAL POSITION) (IL:* IL:|;;| "read from STREAM a positive integer written by WRITE-POINTER. if POSITION is specified the file position will be set to it first.") (IL:* IL:|;;| "read BYTES-PER-POINTER bytes from stream and return them as an integer. this is the inverse of WRITE-P ") (WHEN POSITION (FILE-POSITION STREAM POSITION)) (LET ((VALUE 0) BYTE) (DOTIMES (N BYTES-PER-POINTER) (SETQ BYTE (READ-BYTE STREAM)) (WHEN (NOT (ZEROP BYTE)) (IL:* IL:|;;| "optimization: DPB is really slow w/ high bytes") (SETQ VALUE (DPB BYTE (BYTE BITS-PER-BYTE (* N BITS-PER-BYTE)) VALUE)))) VALUE)) (DEFMACRO NULL-POINTER? (POINTER) `(EQL ,POINTER THE-NULL-POINTER)) (IL:* IL:|;;| "conveniences") (DEFUN HISTOGRAM (HASH-FILE) (IL:* IL:|;;| "return an ALIST of bucket depths dotted with number of occurences") (WITH.MONITOR (HASH-FILE-MONITOR HASH-FILE) (LET* ((STREAM (ENSURE-STREAM-IS-OPEN HASH-FILE)) (SIZE (HASH-FILE-SIZE HASH-FILE)) (LAST-POINTER (+ TABLE-POSITION (* BYTES-PER-POINTER (1- SIZE)))) NEXT-POINTER RESULT) (IL:* IL:|;;| "loop over table") (DO ((TABLE-POINTER TABLE-POSITION (+ TABLE-POINTER BYTES-PER-POINTER))) ((> TABLE-POINTER LAST-POINTER)) (IL:* IL:|;;| "loop down bucket") (DO ((POINTER (READ-POINTER STREAM TABLE-POINTER) NEXT-POINTER) (BUCKET-LENGTH 0 (1+ BUCKET-LENGTH))) ((NULL-POINTER? POINTER) (IL:* IL:|;;| "end of bucket or empty bucket") (IL:* IL:|;;| "increment count for buckets of this length") (INCF (CDR (OR (ASSOC BUCKET-LENGTH RESULT) (CAR (PUSH (CONS BUCKET-LENGTH 0) RESULT)))))) (SETQ NEXT-POINTER (READ-POINTER STREAM POINTER)))) (SORT RESULT #'(LAMBDA (PAIR-1 PAIR-2) (< (CAR PAIR-1) (CAR PAIR-2))))))) (DEFUN CONVERT (IL-HASH-FILE CL-HASH-FILE) "convert a HASH hash file into a HASH-FILE hash file" (IL:* IL:|;;| "first make sure HASH is loaded") (IL:FILESLOAD (IL:SYSLOAD IL:FROM IL:LISPUSERS) HASH) (LET* ((OLD-HASH-FILE (IL:OPENHASHFILE IL-HASH-FILE)) (NEW-HASH-FILE (MAKE-HASH-FILE CL-HASH-FILE (IL:HASHFILEPROP OLD-HASH-FILE 'IL:SIZE))) (ABORT 'T)) (UNWIND-PROTECT (PROGN (IL:MAPHASHFILE OLD-HASH-FILE #'(LAMBDA (KEY VALUE) (PUT-HASH-FILE KEY NEW-HASH-FILE VALUE))) (SETQ ABORT 'NIL)) (IL:CLOSEHASHFILE OLD-HASH-FILE) (CLOSE-HASH-FILE NEW-HASH-FILE :ABORT ABORT)))) (IL:* IL:|;;;| "default user code") (DEFUN HASH-OBJECT (OBJECT RANGE) (IL:* IL:|;;;| "return an integer between 0 and (1- RANGE), inclusive") (IL:* IL:|;;;| "objects which are EQUAL will return the same integer") (1- (HASH-OBJECT-INTERNAL OBJECT RANGE 0))) (DEFUN HASH-OBJECT-INTERNAL (OBJECT RANGE DEPTH) (IL:* IL:|;;| "recursively descend OBJECT, combining characters & integers at leaves with multiplication modulo RANGE. never descend more than *HASH-DEPTH* into a structure.") (IL:* IL:|;;| "return an integer between 1 and RANGE, inclusive") (IF (EQL DEPTH *HASH-DEPTH*) 1 (TYPECASE OBJECT (STRING (LET ((VALUE 1) (LENGTH (LENGTH OBJECT))) (DOTIMES (N (MIN LENGTH (- *HASH-DEPTH* DEPTH)) (COMBINE RANGE VALUE LENGTH)) (SETF VALUE (COMBINE RANGE VALUE (CHAR-CODE (CHAR OBJECT N))))))) (SYMBOL (IL:* IL:|;;| "combine hash values of name and package name") (COMBINE RANGE (HASH-OBJECT-INTERNAL (LET ((PKG (SYMBOL-PACKAGE OBJECT))) (AND PKG (PACKAGE-NAME PKG))) RANGE (1+ DEPTH)) (HASH-OBJECT-INTERNAL (SYMBOL-NAME OBJECT) RANGE (1+ DEPTH)))) (CONS (IL:* IL:|;;| "combine hash values of CAR and CDR") (COMBINE RANGE (HASH-OBJECT-INTERNAL (CAR OBJECT) RANGE (1+ DEPTH)) (HASH-OBJECT-INTERNAL (CDR OBJECT) RANGE (1+ DEPTH)))) (NUMBER (TYPECASE OBJECT (INTEGER (COMBINE RANGE (ABS OBJECT))) (FLOAT (MULTIPLE-VALUE-BIND (SIG EXPON) (INTEGER-DECODE-FLOAT OBJECT) (COMBINE RANGE SIG (ABS EXPON)))) (RATIO (COMBINE RANGE (ABS (NUMERATOR OBJECT)) (DENOMINATOR OBJECT))) (COMPLEX (COMBINE RANGE (HASH-OBJECT-INTERNAL (REALPART OBJECT) RANGE (1+ DEPTH)) (HASH-OBJECT-INTERNAL (IMAGPART OBJECT) RANGE (1+ DEPTH)))))) (CHARACTER (COMBINE RANGE (CHAR-CODE OBJECT))) (PATHNAME (HASH-OBJECT-INTERNAL (NAMESTRING OBJECT) RANGE (1+ DEPTH))) (BIT-VECTOR (LET ((VALUE 1) (LENGTH (LENGTH OBJECT))) (DOTIMES (N (MIN LENGTH (- *HASH-DEPTH* DEPTH)) (COMBINE RANGE VALUE LENGTH)) (SETF VALUE (COMBINE RANGE VALUE (IF (ZEROP (BIT OBJECT N)) 0 (EXPT 2 N))))))) (T (IL:* IL:|;;| "can't dependably read/print other objects") (ERROR "Can't hash a(n) ~S" (TYPE-OF OBJECT)))))) (DEFMACRO COMBINE (RANGE &REST INTEGERS) (IL:* IL:|;;;| "combine non-negative integers returning an integer between 1 and RANGE inclusive (zeros are bad when combining with multiplication). we don't do the obvious (1+ (mod (* . integers) range)) to avoid making bignums. ") (IL:* IL:|;;;| " caution: RANGE may be evaluated many times.") `(1+ (MOD ,(IF (ENDP (REST INTEGERS)) (FIRST INTEGERS) `(* ,(FIRST INTEGERS) (COMBINE ,RANGE ,@(REST INTEGERS)))) ,RANGE))) (DEFVAR *HASH-DEPTH* 17) (DEFUN DEFAULT-READ-FN (STREAM) (IL:* IL:|;;| "default reader for hash files") (WITH-READER-ENVIRONMENT *READER-ENVIRONMENT* (READ STREAM))) (DEFUN DEFAULT-PRINT-FN (OBJECT STREAM) (IL:* IL:|;;| "default printer for hash files") (WITH-READER-ENVIRONMENT *READER-ENVIRONMENT* (LET ((*PRINT-PRETTY* 'NIL)) (PRINT OBJECT STREAM))) OBJECT) (DEFVAR *READER-ENVIRONMENT* (MAKE-READER-ENVIRONMENT (FIND-PACKAGE "XCL") (FIND-READTABLE "XCL") 10)) (IL:PUTPROPS IL:HASH-FILE IL:FILETYPE :COMPILE-FILE) (IL:PUTPROPS IL:HASH-FILE IL:MAKEFILE-ENVIRONMENT (:READTABLE "XCL" :PACKAGE (DEFPACKAGE "HASH-FILE" (:USE "LISP" "XCL") (:IMPORT WITH-READER-ENVIRONMENT MAKE-READER-ENVIRONMENT FIND-READTABLE UNINTERRUPTABLY WITH.MONITOR CREATE.MONITORLOCK MONITORLOCK)))) (IL:PUTPROPS IL:HASH-FILE IL:COPYRIGHT ("Venue & Xerox Corporation" 1987 1988 1990)) (IL:DECLARE\: IL:DONTCOPY (IL:FILEMAP (NIL))) IL:STOP