DIRECTORY Ascii USING [Lower], CountedVM USING [Allocate, Handle, Pointer], IO USING [STREAM, UnsafeGetBlock, UnsafePutBlock], Basics USING [BITXOR, bytesPerWord], RefText USING [TrustTextAsRope], Rope USING [ROPE, QFetch], BitTableLookup USING [Table, TableContents, Failed, FailureTypes, RowPrime, RowContents, RowPointer]; BitTableLookupImpl: CEDAR PROGRAM IMPORTS Ascii, IO, Basics, BitTableLookup, Rope, CountedVM, RefText EXPORTS BitTableLookup SHARES Rope = BEGIN ROPE: TYPE = Rope.ROPE; STREAM: TYPE = IO.STREAM; Table: TYPE = BitTableLookup.Table; TableContents: TYPE = BitTableLookup.TableContents; RowContents: TYPE = BitTableLookup.RowContents; RowPointer: TYPE = BitTableLookup.RowPointer; BPW: INT = Basics.bytesPerWord; BPI: INT = Basics.bytesPerWord * INT.SIZE; BPC: INT = Basics.bytesPerWord * CARDINAL.SIZE; maxTableSize: INT _ 268238848; IAmNotACrook: INT = 2789127; Failed: PUBLIC ERROR[why: BitTableLookup.FailureTypes] = CODE; BitToRow: PROC[bitSize: INT] RETURNS [rowSize: INT] = INLINE { rowSize _ (bitSize+(RowContents.SIZE*BPW*8) - 1) / (RowContents.SIZE*BPW*8); }; Create: PUBLIC PROC [ size: INT ] RETURNS [ table: Table ] = { }; Read: PUBLIC PROC [ in: STREAM ] RETURNS [ table: Table ] = TRUSTED { doWeBelieveHim: INT; rowSize: CARDINAL; bytesPerRow: INT; tableBase: LONG POINTER; IF IO.UnsafeGetBlock[in, [LOOPHOLE[LONG[@doWeBelieveHim]], 0, BPI]] # BPI THEN ERROR BitTableLookup.Failed[BadTable]; IF doWeBelieveHim # IAmNotACrook THEN ERROR Failed[BadTable]; IF IO.UnsafeGetBlock[ in, [LOOPHOLE[LONG[@rowSize]], 0, BPC]] # BPC THEN ERROR BitTableLookup.Failed[BadTable]; table _ NEW[TableContents[rowSize]]; table.vmHandle _ CountedVM.Allocate[TableContents.SIZE+rowSize*RowPointer.SIZE+rowSize*RowContents.SIZE]; tableBase _ LOOPHOLE[CountedVM.Pointer[table.vmHandle] + TableContents.SIZE + rowSize*RowPointer.SIZE]; bytesPerRow _ RowContents.SIZE * BPW; FOR i: CARDINAL IN [0..rowSize) DO table.bits[i] _ tableBase+i*RowContents.SIZE; IF IO.UnsafeGetBlock[in, [LOOPHOLE[table.bits[i]], 0, bytesPerRow]] # bytesPerRow THEN BitTableLookup.Failed[BadTable]; ENDLOOP; IF IO.UnsafeGetBlock[ in, [LOOPHOLE[LONG[@doWeBelieveHim]], 0, BPI]] # BPI THEN ERROR BitTableLookup.Failed[BadTable]; IF doWeBelieveHim # IAmNotACrook THEN ERROR BitTableLookup.Failed[BadTable]; }; Write: PUBLIC PROC [ table: Table, out: STREAM ] = TRUSTED { validationStamp: INT _ IAmNotACrook; s: INT; rowSize: CARDINAL _ table.rowSize; IO.UnsafePutBlock[ out, [LOOPHOLE[LONG[@validationStamp]], 0, BPI]]; IO.UnsafePutBlock[ out, [LOOPHOLE[LONG[@rowSize]], 0, BPC]]; s _ RowContents.SIZE * BPW; FOR i: CARDINAL IN [0..rowSize) DO IO.UnsafePutBlock[out, [LOOPHOLE[table.bits[i]], 0, s]]; ENDLOOP; IO.UnsafePutBlock[ out, [LOOPHOLE[LONG[@validationStamp]], 0, BPI]]; }; Insert: PUBLIC PROC [table: Table, word: REF TEXT] = { [] _ InsertOrLookup[ table, word, FALSE ]; }; Lookup: PUBLIC PROC [table: Table, word: REF TEXT] RETURNS [member: BOOLEAN] = { member _ InsertOrLookup[ table, word, TRUE ]; }; CharTable: ARRAY [0..26) OF CARDINAL = [ LOOPHOLE[-211], LOOPHOLE[-332], 449, 588, 7102, LOOPHOLE[-854], LOOPHOLE[-991], LOOPHOLE[-1118], 1278, 1433, LOOPHOLE[-1568], LOOPHOLE[-1709], 1874, 2027, 2180, LOOPHOLE[-2341], LOOPHOLE[-2478], LOOPHOLE[-2671], 2798, 2963, LOOPHOLE[-3164], LOOPHOLE[-3320], LOOPHOLE[-3469], 3624, 3793, 3944 ]; NumberHashFunctions: PUBLIC PROC [] RETURNS [numberHashFunctions: INT] = { numberHashFunctions _ 7; }; InsertOrLookup: PROC [table: Table, word: REF TEXT, lookup: BOOLEAN ] RETURNS [member: BOOLEAN _ FALSE] = TRUSTED { M: PROC[n, m: CARDINAL] RETURNS [r: CARDINAL] = TRUSTED INLINE { r _ n - m; }; P: PROC[n, m: CARDINAL] RETURNS [r: CARDINAL] = TRUSTED INLINE { r _ n - m; }; R: PROC[n, m: CARDINAL] RETURNS [r: CARDINAL] = TRUSTED INLINE { r _ m - n; }; X: PROC[n, m: CARDINAL] RETURNS [r: CARDINAL] = TRUSTED INLINE { r _ Basics.BITXOR[n,m]; }; R1: PROC[n: CARDINAL] RETURNS [r: CARDINAL] = TRUSTED INLINE { IF n >= 100000B THEN r _ n*2+1 ELSE r _ n*2; }; R3: PROC[n: CARDINAL] RETURNS [r: CARDINAL] = TRUSTED INLINE { r _ n*8+n/20000B; }; R5: PROC[n: CARDINAL] RETURNS [r: CARDINAL] = TRUSTED INLINE { r _ n*32+n/4000B; }; Word: PROC[n: CARDINAL] RETURNS [wordOffset: CARDINAL] = TRUSTED INLINE { wordOffset _ n MOD table.rowSize; }; Bit: PROC[n: CARDINAL] RETURNS [bitNum: CARDINAL] = TRUSTED INLINE { bitNum _ n MOD BitTableLookup.RowPrime; }; char: CHAR; c, p: CARDINAL; hash1, hash2, hash3, hash4, hash5, hash6, hash7: CARDINAL; x1, x2, x3, x4, x5, x6, x7: CARDINAL; hash1 _ LOOPHOLE[953]; hash2 _ LOOPHOLE[-1667]; hash3 _ LOOPHOLE[2441]; hash4 _ LOOPHOLE[-3271]; hash5 _ LOOPHOLE[4079]; hash6 _ LOOPHOLE[-4951]; hash7 _ LOOPHOLE[5807]; p _ 359; FOR j: CARDINAL IN [0..word.length) DO char _ Ascii.Lower[Rope.QFetch[RefText.TrustTextAsRope[word], j]]; IF char IN ['a..'z] THEN c _ CharTable[char - 'a] ELSE c _ char - 0C; p _ p + 1009; x1 _ M[ X[ R5[hash2], c], p]; x2 _ R[ M[ R3[hash3], c], p]; x3 _ X[ P[ R5[hash4], c], p]; x4 _ P[ R[ R3[hash5], c], p]; x5 _ P[ X[ R5[hash6], c], p]; x6 _ M[ M[ R3[hash7], c], p]; x7 _ R[ P[ R5[hash1], c], p]; hash1 _ x1; hash2 _ x2; hash3 _ x3; hash4 _ x4; hash5 _ x5; hash6 _ x6; hash7 _ x7; ENDLOOP; IF lookup THEN { IF ~table.bits[LOOPHOLE[Word[hash1]]][Bit[hash7]] THEN RETURN; IF ~table.bits[LOOPHOLE[Word[hash2]]][Bit[hash6]] THEN RETURN; IF ~table.bits[LOOPHOLE[Word[hash3]]][Bit[hash5]] THEN RETURN; IF ~table.bits[LOOPHOLE[Word[hash4]]][Bit[hash4]] THEN RETURN; IF ~table.bits[LOOPHOLE[Word[hash5]]][Bit[hash3]] THEN RETURN; IF ~table.bits[LOOPHOLE[Word[hash6]]][Bit[hash2]] THEN RETURN; IF ~table.bits[LOOPHOLE[Word[hash7]]][Bit[hash1]] THEN RETURN; } ELSE { table.bits[LOOPHOLE[Word[hash1]]][Bit[hash7]] _ TRUE; table.bits[LOOPHOLE[Word[hash2]]][Bit[hash6]] _ TRUE; table.bits[LOOPHOLE[Word[hash3]]][Bit[hash5]] _ TRUE; table.bits[LOOPHOLE[Word[hash4]]][Bit[hash4]] _ TRUE; table.bits[LOOPHOLE[Word[hash5]]][Bit[hash3]] _ TRUE; table.bits[LOOPHOLE[Word[hash6]]][Bit[hash2]] _ TRUE; table.bits[LOOPHOLE[Word[hash7]]][Bit[hash1]] _ TRUE; }; member _ TRUE; }; END. CHANGE LOG Created by Nix on September 14, 1983 5:02 pm ÚBitTableLookupImpl.mesa -- Last Edited by -- Nix on October 27, 1983 1:23 pm Beach, February 13, 1985 10:15:28 pm PST Current implementation restrictions impose this bound on the maximum size of a bit table. The given size should suffice for tables representing a set of about 14,000,000 words. This magic number is written at the beginning and end of every bit table written to a stream, and its presence is checked whenever a table is read. Creates a table with the given number of bits of capacity. The number of bits should be a prime number that is approximately 20 times the number of words in the given table. ERRORS: Raises BitTableLookup.Failed[TooLarge] if the table size is greater than 1,048,576 bits. rowSize: INT _ BitToRow[size] ; IF size > maxTableSize THEN ERROR Failed[TooLarge]; table _ NEW[TableContents[rowSize]]; FOR i: INT IN [0..rowSize) DO table.bits[i] _ NEW[RowContents _ ALL[FALSE]]; ENDLOOP; Reads in a Table from the given stream. One had better be there. Writes out a Table to the given stream. Inserts the word into the given table. Looks up the word in the table and returns TRUE iff it thinks that the word is in there. a b c d e f g h i j k l m n o p q r s t u v w x y z Ê ˜šœL™LJ™(—J˜šÏk ˜ Jšœœ ˜Jšœ œ˜,Jšœœœ"˜2Jšœœœ˜$Jšœœ˜ Jšœœœ ˜JšœœQ˜e—J˜šÏbœœ˜!Jšœœ2˜CJšœ˜Jšœ˜ —Jš˜Jšœœœ˜Jšœœœœ˜Jšœœ˜#Jšœœ ˜3Jšœ œ˜/Jšœ œ˜-Jšœœ˜Jšœœœœ˜*Jšœœœœ˜/J˜šœœ ˜Jšœ²™²—šœœ ˜Jšœ”™”J™—Jšœœœ%œ˜?š Ïnœœ œœ œœ˜>Jš œ œœœœ˜LJ˜J˜—š Ÿœœœ œœ˜>Jšœ¯™¯Jšœ`™`Jšœ œ™Jšœœœ™3Jšœœ™$šœœœ™Jšœœœœ™.Jšœ™—Jšœ˜—J˜š Ÿœœœœœœ˜EJšœA™AJšœœ˜Jšœ œ˜Jšœ œ˜Jšœ œœ˜š œœœœœœ˜NJšœ!˜&—Jšœœœ˜=š œœœœœœ˜HJšœ!˜&—J˜Jšœœ˜$Jšœ2œœœ˜iJšœ œ3œœ˜gJšœœœ˜%šœœœ˜"Jšœ(œ˜-šœœœ0˜VJšœ!˜!—Jšœ˜—š œœœœœœ˜OJšœ!˜&—Jšœœœ!˜LJ˜J˜—š Ÿœœœœœ˜Jšœœ œ ˜,J˜—šŸœœœœœœœ˜>Jšœ˜J˜—šŸœœœœœœœ˜>Jšœ˜J˜—šŸœœœœœœœ˜IJšœœ˜!J˜—šŸœœœœ œœœ˜DJšœ œ˜'J˜—Jšœœ˜ Jšœœ˜Jšœ1œ˜:Jšœœ˜&Jšœœ˜Jšœœ˜Jšœœ˜Jšœœ˜Jšœœ˜Jšœœ˜Jšœœ˜J˜šœœœ˜&JšœB˜Bšœœ ˜J˜—š˜J˜—Jšœ ˜ Jšœœœ˜Jšœœœ˜Jšœœœ˜Jšœœœ˜Jšœœœ˜Jšœœœ˜Jšœœœ˜Jšœ ˜ J˜ J˜ J˜ J˜ J˜ J˜ Jšœ˜—šœœ˜Jšœ œœœ˜>Jšœ œœœ˜>Jšœ œœœ˜>Jšœ œœœ˜>Jšœ œœœ˜>Jšœ œœœ˜>Jšœ œœœ˜>J˜—šœ˜Jšœ œœ˜5Jšœ œœ˜5Jšœ œœ˜5Jšœ œœ˜5Jšœ œœ˜5Jšœ œœ˜5Jšœ œœ˜5J˜—Jšœ œ˜J˜—J˜Jšœ˜J˜Jšœ˜ J˜Jšœ-˜-—…—ú&Ö