DIRECTORY
Ascii USING [Lower],
CountedVM USING [Allocate, Handle],
IO USING [STREAM, UnsafeGetBlock, UnsafePutBlock],
Basics USING [BITXOR, bytesPerWord],
RefText USING [TrustTextAsRope],
Rope USING [ROPE, QFetch],
BitTableLookup USING [Table, TableContents, FailureTypes, RowPrime, RowContents, RowPointer];
=
BEGIN
ROPE: TYPE = Rope.ROPE;
STREAM: TYPE = IO.STREAM;
Table: TYPE = BitTableLookup.Table;
TableContents: TYPE = BitTableLookup.TableContents;
RowContents: TYPE = BitTableLookup.RowContents;
RowPointer: TYPE = BitTableLookup.RowPointer;
BPW: INT = Basics.bytesPerWord;
BPI: INT = Basics.bytesPerWord * INT.SIZE;
BPC: INT = Basics.bytesPerWord * CARDINAL.SIZE;
maxTableSize:
INT ← 268238848;
Current implementation restrictions impose this bound on the maximum size of a bit table. The given size should suffice for tables representing a set of about 14,000,000 words.
IAmNotACrook:
INT = 2789127;
This magic number is written at the beginning and end of every bit table written to a stream, and its presence is checked whenever a table is read.
Failed: PUBLIC ERROR[why: BitTableLookup.FailureTypes] = CODE;
BitToRow:
PROC[bitSize:
INT]
RETURNS [rowSize:
INT] =
INLINE {
rowSize ← (bitSize+(RowContents.SIZE*BPW*8) - 1) / (RowContents.SIZE*BPW*8);
};
Create:
PUBLIC
PROC [ size:
INT ]
RETURNS [ table: Table ] = {
Creates a table with the given number of bits of capacity. The number of bits should be a prime number that is approximately 20 times the number of words in the given table.
ERRORS: Raises Failed[TooLarge] if the table size is greater than 1,048,576 bits.
rowSize: INT ← BitToRow[size] ;
IF size > maxTableSize THEN ERROR Failed[TooLarge];
table ← NEW[TableContents[rowSize]];
FOR i: INT IN [0..rowSize) DO
table.bits[i] ← NEW[RowContents ← ALL[FALSE]];
ENDLOOP;
};
Read:
PUBLIC
PROC [ in:
STREAM ]
RETURNS [ table: Table ] =
TRUSTED {
Reads in a Table from the given stream. One had better be there.
doWeBelieveHim: INT;
rowSize: CARDINAL;
bytesPerRow: INT;
tableBase: LONG POINTER;
IF
IO.UnsafeGetBlock[in, [
LOOPHOLE[
LONG[@doWeBelieveHim]], 0,
BPI]] #
BPI
THEN
ERROR Failed[BadTable];
IF doWeBelieveHim # IAmNotACrook THEN ERROR Failed[BadTable];
IF
IO.UnsafeGetBlock[ in, [
LOOPHOLE[
LONG[@rowSize]], 0,
BPC]] #
BPC
THEN
ERROR Failed[BadTable];
table ← NEW[TableContents[rowSize]];
table.vmHandle ← CountedVM.Allocate[TableContents.SIZE+rowSize*RowPointer.SIZE+rowSize*RowContents.SIZE];
tableBase ← LOOPHOLE[table.vmHandle.pointer + TableContents.SIZE + rowSize*RowPointer.SIZE];
bytesPerRow ← RowContents.SIZE * BPW;
FOR i:
CARDINAL
IN [0..rowSize)
DO
table.bits[i] ← tableBase+i*RowContents.SIZE;
IF
IO.UnsafeGetBlock[in, [
LOOPHOLE[table.bits[i]], 0, bytesPerRow]] # bytesPerRow
THEN
Failed[BadTable];
ENDLOOP;
IF
IO.UnsafeGetBlock[ in, [
LOOPHOLE[
LONG[@doWeBelieveHim]], 0,
BPI]] #
BPI
THEN
ERROR Failed[BadTable];
IF doWeBelieveHim # IAmNotACrook THEN ERROR Failed[BadTable];
};
Write:
PUBLIC
PROC [ table: Table, out:
STREAM ] =
TRUSTED {
Writes out a Table to the given stream.
validationStamp: INT ← IAmNotACrook;
s: INT;
rowSize: CARDINAL ← table.rowSize;
IO.UnsafePutBlock[ out, [LOOPHOLE[LONG[@validationStamp]], 0, BPI]];
IO.UnsafePutBlock[ out, [LOOPHOLE[LONG[@rowSize]], 0, BPC]];
s ← RowContents.SIZE * BPW;
FOR i:
CARDINAL
IN [0..rowSize)
DO
IO.UnsafePutBlock[out, [LOOPHOLE[table.bits[i]], 0, s]];
ENDLOOP;
IO.UnsafePutBlock[ out, [LOOPHOLE[LONG[@validationStamp]], 0, BPI]];
};
Insert:
PUBLIC
PROC [table: Table, word:
REF
TEXT] = {
Inserts the word into the given table.
[] ← InsertOrLookup[ table, word, FALSE ];
};
Lookup:
PUBLIC
PROC [table: Table, word:
REF
TEXT]
RETURNS [member:
BOOLEAN] = {
Looks up the word in the table and returns TRUE iff it thinks that the word is in there.
member ← InsertOrLookup[ table, word, TRUE ];
};
CharTable:
ARRAY [0..26)
OF
CARDINAL = [
a b c d e f g h i j
LOOPHOLE[-211], LOOPHOLE[-332], 449, 588, 7102, LOOPHOLE[-854], LOOPHOLE[-991], LOOPHOLE[-1118], 1278, 1433,
k l m n o p q r s t
LOOPHOLE[-1568], LOOPHOLE[-1709], 1874, 2027, 2180, LOOPHOLE[-2341], LOOPHOLE[-2478], LOOPHOLE[-2671], 2798, 2963,
u v w x y z
LOOPHOLE[-3164], LOOPHOLE[-3320], LOOPHOLE[-3469], 3624, 3793, 3944
];
NumberHashFunctions:
PUBLIC
PROC []
RETURNS [numberHashFunctions:
INT] = {
numberHashFunctions ← 7;
};
InsertOrLookup:
PROC [table: Table, word:
REF
TEXT, lookup:
BOOLEAN ]
RETURNS [member:
BOOLEAN ←
FALSE] =
TRUSTED {
M:
PROC[n, m:
CARDINAL]
RETURNS [r:
CARDINAL] =
TRUSTED
INLINE {
r ← n - m;
};
P:
PROC[n, m:
CARDINAL]
RETURNS [r:
CARDINAL] =
TRUSTED
INLINE {
r ← n - m;
};
R:
PROC[n, m:
CARDINAL]
RETURNS [r:
CARDINAL] =
TRUSTED
INLINE {
r ← m - n;
};
X:
PROC[n, m:
CARDINAL]
RETURNS [r:
CARDINAL] =
TRUSTED
INLINE {
r ← Basics.BITXOR[n,m];
};
R1:
PROC[n:
CARDINAL]
RETURNS [r:
CARDINAL] =
TRUSTED
INLINE {
IF n >= 100000B THEN r ← n*2+1 ELSE r ← n*2;
};
R3:
PROC[n:
CARDINAL]
RETURNS [r:
CARDINAL] =
TRUSTED
INLINE {
r ← n*8+n/20000B;
};
R5:
PROC[n:
CARDINAL]
RETURNS [r:
CARDINAL] =
TRUSTED
INLINE {
r ← n*32+n/4000B;
};
Word:
PROC[n:
CARDINAL]
RETURNS [wordOffset:
CARDINAL] =
TRUSTED
INLINE {
wordOffset ← n MOD table.rowSize;
};
Bit:
PROC[n:
CARDINAL]
RETURNS [bitNum:
CARDINAL] =
TRUSTED
INLINE {
bitNum ← n MOD BitTableLookup.RowPrime;
};
char: CHAR;
c, p: CARDINAL;
hash1, hash2, hash3, hash4, hash5, hash6, hash7: CARDINAL;
x1, x2, x3, x4, x5, x6, x7: CARDINAL;
hash1 ← LOOPHOLE[953];
hash2 ← LOOPHOLE[-1667];
hash3 ← LOOPHOLE[2441];
hash4 ← LOOPHOLE[-3271];
hash5 ← LOOPHOLE[4079];
hash6 ← LOOPHOLE[-4951];
hash7 ← LOOPHOLE[5807];
p ← 359;
FOR j:
CARDINAL
IN [0..word.length)
DO
char ← Ascii.Lower[Rope.QFetch[RefText.TrustTextAsRope[word], j]];
IF char
IN ['a..'z]
THEN
c ← CharTable[char - 'a]
p ← p + 1009;
x1 ← M[ X[ R5[hash2], c], p];
x2 ← R[ M[ R3[hash3], c], p];
x3 ← X[ P[ R5[hash4], c], p];
x4 ← P[ R[ R3[hash5], c], p];
x5 ← P[ X[ R5[hash6], c], p];
x6 ← M[ M[ R3[hash7], c], p];
x7 ← R[ P[ R5[hash1], c], p];
hash1 ← x1;
hash2 ← x2;
hash3 ← x3;
hash4 ← x4;
hash5 ← x5;
hash6 ← x6;
hash7 ← x7;
ENDLOOP;
IF lookup
THEN {
IF ~table.bits[LOOPHOLE[Word[hash1]]][Bit[hash7]] THEN RETURN;
IF ~table.bits[LOOPHOLE[Word[hash2]]][Bit[hash6]] THEN RETURN;
IF ~table.bits[LOOPHOLE[Word[hash3]]][Bit[hash5]] THEN RETURN;
IF ~table.bits[LOOPHOLE[Word[hash4]]][Bit[hash4]] THEN RETURN;
IF ~table.bits[LOOPHOLE[Word[hash5]]][Bit[hash3]] THEN RETURN;
IF ~table.bits[LOOPHOLE[Word[hash6]]][Bit[hash2]] THEN RETURN;
IF ~table.bits[LOOPHOLE[Word[hash7]]][Bit[hash1]] THEN RETURN;
}
ELSE {
table.bits[LOOPHOLE[Word[hash1]]][Bit[hash7]] ← TRUE;
table.bits[LOOPHOLE[Word[hash2]]][Bit[hash6]] ← TRUE;
table.bits[LOOPHOLE[Word[hash3]]][Bit[hash5]] ← TRUE;
table.bits[LOOPHOLE[Word[hash4]]][Bit[hash4]] ← TRUE;
table.bits[LOOPHOLE[Word[hash5]]][Bit[hash3]] ← TRUE;
table.bits[LOOPHOLE[Word[hash6]]][Bit[hash2]] ← TRUE;
table.bits[LOOPHOLE[Word[hash7]]][Bit[hash1]] ← TRUE;
};
member ← TRUE;
};