DIRECTORY Ascii USING [Lower], CountedVM USING [Allocate, Handle], IO USING [STREAM, UnsafeGetBlock, UnsafePutBlock], Basics USING [BITXOR, bytesPerWord], RefText USING [TrustTextAsRope], Rope USING [ROPE, QFetch], BitTableLookup USING [Table, TableContents, FailureTypes, RowPrime, RowContents, RowPointer]; BitTableLookupImpl: CEDAR PROGRAM IMPORTS Ascii, IO, Basics, Rope, CountedVM, RefText EXPORTS BitTableLookup SHARES Rope = BEGIN ROPE: TYPE = Rope.ROPE; STREAM: TYPE = IO.STREAM; Table: TYPE = BitTableLookup.Table; TableContents: TYPE = BitTableLookup.TableContents; RowContents: TYPE = BitTableLookup.RowContents; RowPointer: TYPE = BitTableLookup.RowPointer; BPW: INT = Basics.bytesPerWord; BPI: INT = Basics.bytesPerWord * INT.SIZE; BPC: INT = Basics.bytesPerWord * CARDINAL.SIZE; maxTableSize: INT _ 268238848; IAmNotACrook: INT = 2789127; Failed: PUBLIC ERROR[why: BitTableLookup.FailureTypes] = CODE; BitToRow: PROC[bitSize: INT] RETURNS [rowSize: INT] = INLINE { rowSize _ (bitSize+(RowContents.SIZE*BPW*8) - 1) / (RowContents.SIZE*BPW*8); }; Create: PUBLIC PROC [ size: INT ] RETURNS [ table: Table ] = { }; Read: PUBLIC PROC [ in: STREAM ] RETURNS [ table: Table ] = TRUSTED { doWeBelieveHim: INT; rowSize: CARDINAL; bytesPerRow: INT; tableBase: LONG POINTER; IF IO.UnsafeGetBlock[in, [LOOPHOLE[LONG[@doWeBelieveHim]], 0, BPI]] # BPI THEN ERROR Failed[BadTable]; IF doWeBelieveHim # IAmNotACrook THEN ERROR Failed[BadTable]; IF IO.UnsafeGetBlock[ in, [LOOPHOLE[LONG[@rowSize]], 0, BPC]] # BPC THEN ERROR Failed[BadTable]; table _ NEW[TableContents[rowSize]]; table.vmHandle _ CountedVM.Allocate[TableContents.SIZE+rowSize*RowPointer.SIZE+rowSize*RowContents.SIZE]; tableBase _ LOOPHOLE[table.vmHandle.pointer + TableContents.SIZE + rowSize*RowPointer.SIZE]; bytesPerRow _ RowContents.SIZE * BPW; FOR i: CARDINAL IN [0..rowSize) DO table.bits[i] _ tableBase+i*RowContents.SIZE; IF IO.UnsafeGetBlock[in, [LOOPHOLE[table.bits[i]], 0, bytesPerRow]] # bytesPerRow THEN Failed[BadTable]; ENDLOOP; IF IO.UnsafeGetBlock[ in, [LOOPHOLE[LONG[@doWeBelieveHim]], 0, BPI]] # BPI THEN ERROR Failed[BadTable]; IF doWeBelieveHim # IAmNotACrook THEN ERROR Failed[BadTable]; }; Write: PUBLIC PROC [ table: Table, out: STREAM ] = TRUSTED { validationStamp: INT _ IAmNotACrook; s: INT; rowSize: CARDINAL _ table.rowSize; IO.UnsafePutBlock[ out, [LOOPHOLE[LONG[@validationStamp]], 0, BPI]]; IO.UnsafePutBlock[ out, [LOOPHOLE[LONG[@rowSize]], 0, BPC]]; s _ RowContents.SIZE * BPW; FOR i: CARDINAL IN [0..rowSize) DO IO.UnsafePutBlock[out, [LOOPHOLE[table.bits[i]], 0, s]]; ENDLOOP; IO.UnsafePutBlock[ out, [LOOPHOLE[LONG[@validationStamp]], 0, BPI]]; }; Insert: PUBLIC PROC [table: Table, word: REF TEXT] = { [] _ InsertOrLookup[ table, word, FALSE ]; }; Lookup: PUBLIC PROC [table: Table, word: REF TEXT] RETURNS [member: BOOLEAN] = { member _ InsertOrLookup[ table, word, TRUE ]; }; CharTable: ARRAY [0..26) OF CARDINAL = [ LOOPHOLE[-211], LOOPHOLE[-332], 449, 588, 7102, LOOPHOLE[-854], LOOPHOLE[-991], LOOPHOLE[-1118], 1278, 1433, LOOPHOLE[-1568], LOOPHOLE[-1709], 1874, 2027, 2180, LOOPHOLE[-2341], LOOPHOLE[-2478], LOOPHOLE[-2671], 2798, 2963, LOOPHOLE[-3164], LOOPHOLE[-3320], LOOPHOLE[-3469], 3624, 3793, 3944 ]; NumberHashFunctions: PUBLIC PROC [] RETURNS [numberHashFunctions: INT] = { numberHashFunctions _ 7; }; InsertOrLookup: PROC [table: Table, word: REF TEXT, lookup: BOOLEAN ] RETURNS [member: BOOLEAN _ FALSE] = TRUSTED { M: PROC[n, m: CARDINAL] RETURNS [r: CARDINAL] = TRUSTED INLINE { r _ n - m; }; P: PROC[n, m: CARDINAL] RETURNS [r: CARDINAL] = TRUSTED INLINE { r _ n - m; }; R: PROC[n, m: CARDINAL] RETURNS [r: CARDINAL] = TRUSTED INLINE { r _ m - n; }; X: PROC[n, m: CARDINAL] RETURNS [r: CARDINAL] = TRUSTED INLINE { r _ Basics.BITXOR[n,m]; }; R1: PROC[n: CARDINAL] RETURNS [r: CARDINAL] = TRUSTED INLINE { IF n >= 100000B THEN r _ n*2+1 ELSE r _ n*2; }; R3: PROC[n: CARDINAL] RETURNS [r: CARDINAL] = TRUSTED INLINE { r _ n*8+n/20000B; }; R5: PROC[n: CARDINAL] RETURNS [r: CARDINAL] = TRUSTED INLINE { r _ n*32+n/4000B; }; Word: PROC[n: CARDINAL] RETURNS [wordOffset: CARDINAL] = TRUSTED INLINE { wordOffset _ n MOD table.rowSize; }; Bit: PROC[n: CARDINAL] RETURNS [bitNum: CARDINAL] = TRUSTED INLINE { bitNum _ n MOD BitTableLookup.RowPrime; }; char: CHAR; c, p: CARDINAL; hash1, hash2, hash3, hash4, hash5, hash6, hash7: CARDINAL; x1, x2, x3, x4, x5, x6, x7: CARDINAL; hash1 _ LOOPHOLE[953]; hash2 _ LOOPHOLE[-1667]; hash3 _ LOOPHOLE[2441]; hash4 _ LOOPHOLE[-3271]; hash5 _ LOOPHOLE[4079]; hash6 _ LOOPHOLE[-4951]; hash7 _ LOOPHOLE[5807]; p _ 359; FOR j: CARDINAL IN [0..word.length) DO char _ Ascii.Lower[Rope.QFetch[RefText.TrustTextAsRope[word], j]]; IF char IN ['a..'z] THEN c _ CharTable[char - 'a] ELSE c _ char - 0C; p _ p + 1009; x1 _ M[ X[ R5[hash2], c], p]; x2 _ R[ M[ R3[hash3], c], p]; x3 _ X[ P[ R5[hash4], c], p]; x4 _ P[ R[ R3[hash5], c], p]; x5 _ P[ X[ R5[hash6], c], p]; x6 _ M[ M[ R3[hash7], c], p]; x7 _ R[ P[ R5[hash1], c], p]; hash1 _ x1; hash2 _ x2; hash3 _ x3; hash4 _ x4; hash5 _ x5; hash6 _ x6; hash7 _ x7; ENDLOOP; IF lookup THEN { IF ~table.bits[LOOPHOLE[Word[hash1]]][Bit[hash7]] THEN RETURN; IF ~table.bits[LOOPHOLE[Word[hash2]]][Bit[hash6]] THEN RETURN; IF ~table.bits[LOOPHOLE[Word[hash3]]][Bit[hash5]] THEN RETURN; IF ~table.bits[LOOPHOLE[Word[hash4]]][Bit[hash4]] THEN RETURN; IF ~table.bits[LOOPHOLE[Word[hash5]]][Bit[hash3]] THEN RETURN; IF ~table.bits[LOOPHOLE[Word[hash6]]][Bit[hash2]] THEN RETURN; IF ~table.bits[LOOPHOLE[Word[hash7]]][Bit[hash1]] THEN RETURN; } ELSE { table.bits[LOOPHOLE[Word[hash1]]][Bit[hash7]] _ TRUE; table.bits[LOOPHOLE[Word[hash2]]][Bit[hash6]] _ TRUE; table.bits[LOOPHOLE[Word[hash3]]][Bit[hash5]] _ TRUE; table.bits[LOOPHOLE[Word[hash4]]][Bit[hash4]] _ TRUE; table.bits[LOOPHOLE[Word[hash5]]][Bit[hash3]] _ TRUE; table.bits[LOOPHOLE[Word[hash6]]][Bit[hash2]] _ TRUE; table.bits[LOOPHOLE[Word[hash7]]][Bit[hash1]] _ TRUE; }; member _ TRUE; }; END. CHANGE LOG Created by Nix on May 2, 1985 3:05:08 pm PDT , BitTableLookupImpl, IMPORTS, EXPORTS, SHARES, =, BitToRow, Create, Write, Insert, Lookup, NumberHashFunctions, InsertOrLookup, M (local of InsertOrLookup), P (local of InsertOrLookup), R (local of InsertOrLookup), X (local of InsertOrLookup), R1 (local of InsertOrLookup), R3 (local of InsertOrLookup), R5 (local of InsertOrLookup), Word (local of InsertOrLookup), Bit (local of InsertOrLookup), END, CHANGE, Created .BitTableLookupImpl.mesa Copyright c 1985 by Xerox Corporation. All rights reserved. Bob Nix, October 27, 1983 1:23 pm Rick Beach, May 2, 1985 3:05:07 pm PDT Last Edited by: Gasbarro October 11, 1985 5:10:13 pm PDT Current implementation restrictions impose this bound on the maximum size of a bit table. The given size should suffice for tables representing a set of about 14,000,000 words. This magic number is written at the beginning and end of every bit table written to a stream, and its presence is checked whenever a table is read. Creates a table with the given number of bits of capacity. The number of bits should be a prime number that is approximately 20 times the number of words in the given table. ERRORS: Raises Failed[TooLarge] if the table size is greater than 1,048,576 bits. rowSize: INT _ BitToRow[size] ; IF size > maxTableSize THEN ERROR Failed[TooLarge]; table _ NEW[TableContents[rowSize]]; FOR i: INT IN [0..rowSize) DO table.bits[i] _ NEW[RowContents _ ALL[FALSE]]; ENDLOOP; Reads in a Table from the given stream. One had better be there. Writes out a Table to the given stream. Inserts the word into the given table. Looks up the word in the table and returns TRUE iff it thinks that the word is in there. a b c d e f g h i j k l m n o p q r s t u v w x y z Κ O˜codešœ™Kšœ Οmœ1™Kš œ žœžœžœžœ˜LK˜K˜—š  œžœžœ žœžœ˜>Kšœ―™―KšœQ™QKšœ žœ™Kšžœžœžœ™3Kšœžœ™$šžœžœžœž™Kšœžœžœžœ™.Kšžœ™—Kšœ˜—K˜š  œžœžœžœžœžœ˜EKšœA™AKšœžœ˜Kšœ žœ˜Kšœ žœ˜Kšœ žœžœ˜š žœžœžœžœžœžœž˜NKšžœ˜—Kšžœžœžœ˜=š žœžœžœžœžœžœž˜HKšžœ˜—K˜Kšœžœ˜$Kšœ2žœžœžœ˜iKšœ žœ(žœžœ˜\Kšœžœžœ˜%šžœžœžœž˜"Kšœ(žœ˜-šžœžœžœ0ž˜VKšœ˜—Kšžœ˜—š žœžœžœžœžœžœž˜OKšžœ˜—Kšžœžœžœ˜=K˜K˜—š  œžœžœžœžœ˜Kšžœžœ žœ ˜,K˜—š œžœžœžœžœžœžœ˜>Kšœ˜K˜—š œžœžœžœžœžœžœ˜>Kšœ˜K˜—š œžœžœžœžœžœžœ˜IKšœžœ˜!K˜—š œžœžœžœ žœžœžœ˜DKšœ žœ˜'K˜—Kšœžœ˜ Kšœžœ˜Kšœ1žœ˜:Kšœžœ˜&Kšœžœ˜Kšœžœ˜Kšœžœ˜Kšœžœ˜Kšœžœ˜Kšœžœ˜Kšœžœ˜K˜šžœžœžœž˜&KšœB˜Bšžœžœ ž˜K˜—šž˜K˜—Kšœ ˜ Kšœžœžœ˜Kšœžœžœ˜Kšœžœžœ˜Kšœžœžœ˜Kšœžœžœ˜Kšœžœžœ˜Kšœžœžœ˜Kšœ ˜ K˜ K˜ K˜ K˜ K˜ K˜ Kšžœ˜—šžœžœ˜Kšžœ žœžœžœ˜>Kšžœ žœžœžœ˜>Kšžœ žœžœžœ˜>Kšžœ žœžœžœ˜>Kšžœ žœžœžœ˜>Kšžœ žœžœžœ˜>Kšžœ žœžœžœ˜>K˜—šžœ˜Kšœ žœžœ˜5Kšœ žœžœ˜5Kšœ žœžœ˜5Kšœ žœžœ˜5Kšœ žœžœ˜5Kšœ žœžœ˜5Kšœ žœžœ˜5K˜—Kšœ žœ˜K˜——K˜Kšžœ˜K˜Kšžœž˜ K˜Kšœ-Οr‚œ£œ£œ£œ£œ£œ£œ£œ£œ£˜ΟK˜K™—…—&(£