File: BuildBitTableImpl.mesa
Last Edited by: Nix, October 27, 1983 1:36 pm
Spreitzer, February 26, 1985 2:59:07 pm PST
DIRECTORY
IO USING [Close, STREAM, RIS, GetInt, EndOfStream, GetChar, PutF, int, real, Backup],
RealFns USING [Power],
RealOps USING [Float, RoundLI],
BitTableLookup USING [Create, Insert, Write, Table, RowPrime, NumberHashFunctions],
CommandTool USING [Parse, ArgumentVector],
FS USING [StreamOpen],
Rope USING [ROPE],
Commander USING [CommandProc, Register];
BuildBitTableImpl: CEDAR PROGRAM
IMPORTS IO, Commander, CommandTool, FS, BitTableLookup, RealOps, RealFns = 
BEGIN
ROPE: TYPE = Rope.ROPE;
STREAM: TYPE = IO.STREAM;
WrongNumberOfArgs: ERROR = CODE;
GetToken: PROC [s: STREAM, word: REF TEXT] RETURNS [w: REF TEXT] = {
Utility for reading a token from the given stream into the given buffer.
UnPrintable: PROC [c: CHAR] RETURNS [unPrintable: BOOL] = INLINE {
How's this for device independence?
unPrintable ← c IN ['\000 .. '\040];
};
c: CHAR;
w ← word;
WHILE UnPrintable[c ← s.GetChar[]] DO ENDLOOP;
s.Backup[c];
w.length ← w.maxLength;
FOR i: CARDINAL ← 0, i+1 DO
c ← s.GetChar[ ! IO.EndOfStream => GOTO done];
IF UnPrintable[c] THEN GOTO done;
IF i >= w.maxLength THEN {
w ← NEW[TEXT[2*w.maxLength+1]];
w.length ← w.maxLength;
};
w[i] ← c;
REPEAT
done => w.length ← i;
ENDLOOP;
};
AnalyzeTable: PROC [ t: BitTableLookup.Table, s: STREAM ] = {
fineArray: ARRAY [0..100) OF INTALL[0];
CoarseType: TYPE = ARRAY [0..13000) OF INT;
coarseArray: REF CoarseType ← NEW[CoarseType];
lastBitPos: INT ← -1;
thisBitPos: INT ← 0;
bitDistance: INT ← 0;
totalBits: INT ← 0;
IO.PutF[s, "Analyzing table\n"];
FOR i : CARDINAL IN [0..13000) DO
coarseArray[i] ← 0;
ENDLOOP;
FOR i: CARDINAL IN [0..t.rowSize) DO
FOR j: CARDINAL IN [0..BitTableLookup.RowPrime) DO
thisBit: BOOL;
thisBitPos ← thisBitPos + 1;
TRUSTED {thisBit ← t.bits[i][j]};
IF thisBit THEN {
totalBits ← totalBits + 1;
bitDistance ← thisBitPos - lastBitPos - 1;
IF bitDistance < 100 THEN
fineArray[bitDistance] ← fineArray[bitDistance] + 1
ELSE
coarseArray[bitDistance/10] ← coarseArray[bitDistance/10]+1;
lastBitPos ← thisBitPos;
};
ENDLOOP;
ENDLOOP;
IO.PutF[ s, "A total of %g bits were turned on\n", IO.int[totalBits]];
FOR i: CARDINAL IN [0..100) DO
IF fineArray[i] # 0 THEN
IO.PutF[ s, "Number with distance %g is %g\n", IO.int[i], IO.int[fineArray[i]]];
ENDLOOP;
FOR i: CARDINAL IN [0..13000) DO
IF coarseArray[i] # 0 THEN
IO.PutF[ s, "Number with distance %g-%g is %g\n", IO.int[i*10], IO.int[i*10+9],IO.int[fineArray[i]]];
ENDLOOP;
};
FindGoodBitSize: PROC [desiredSize: INT] RETURNS [betterSize: INT] = {
IsPrime: PROC[number: INT] RETURNS [prime: BOOLFALSE] = {
pp: INT ← 3;
WHILE pp*pp <= number DO
IF (number MOD pp) = 0 THEN
RETURN;
pp ← pp + 2;
ENDLOOP;
prime ← TRUE;
};
p: INT ← desiredSize / BitTableLookup.RowPrime;
IF p MOD 2 = 0 THEN p ← p + 1;
UNTIL IsPrime[p] DO p ← p + 2 ENDLOOP;
betterSize ← p * BitTableLookup.RowPrime;
};
BuildBitTable: Commander.CommandProc = {
cmdLine: CommandTool.ArgumentVector ← CommandTool.Parse[cmd];
stderr, wordFile, bitFile: STREAM;
wordsWanted: INT;
bitSize: INT;
word: REF TEXTNEW[TEXT[100]];
table: BitTableLookup.Table;
numWords: INT ← 0;
stderr ← cmd.out;
IF cmdLine.argc # 4 THEN {
IO.PutF[stderr, "Usage: BuildBitTable <number of words in list> <word file> <bit file>\n"];
RETURN;
};
wordsWanted ← IO.GetInt[IO.RIS[cmdLine[1]]];
wordFile ← FS.StreamOpen[cmdLine.s[2]];
bitFile ← FS.StreamOpen[cmdLine.s[3], create];
IO.PutF[stderr, "Building a table to hold %g words.\n", IO.int[wordsWanted]];
IO.PutF[stderr, "The bit table package is compiled with %g hash functions.\n", IO.int[BitTableLookup.NumberHashFunctions[]]];
bitSize ← RealOps.RoundLI[0.4+RealOps.Float[BitTableLookup.NumberHashFunctions[]]*2.71828];
IO.PutF[stderr, "Building the table with %g bits per word.\n", IO.int[bitSize]];
bitSize ← FindGoodBitSize[bitSize*wordsWanted];
IO.PutF[stderr, "The table will have %g bits.\n", IO.int[bitSize]];
IO.PutF[stderr, "The probability of a false positive should be around %g.\n",
IO.real[RealFns.Power[RealOps.Float[wordsWanted*BitTableLookup.NumberHashFunctions[]]/ RealOps.Float[bitSize], RealOps.Float[BitTableLookup.NumberHashFunctions[]]]]];
table ← BitTableLookup.Create[bitSize-1];
IO.PutF[stderr, "Reading and inserting words.\n"];
DO
word ← GetToken[wordFile, word ! IO.EndOfStream => GOTO eof];
table.Insert[word];
numWords ← numWords + 1;
REPEAT
eof => NULL;
ENDLOOP;
table.Write[bitFile];
IO.Close[wordFile];
IO.Close[bitFile];
IO.PutF[stderr, "Inserted %g words.\n", IO.int[numWords]];
IO.PutF[stderr, "Ideally, %g out of the %g bits should be turned on.\n", IO.int[BitTableLookup.NumberHashFunctions[]*numWords], IO.int[bitSize]];
AnalyzeTable[table, stderr];
};
Commander.Register[ key: "BuildBitTable", proc: BuildBitTable, doc: "Builds a bit table dictionary."];
END.
CHANGE LOG
Created by Nix on February 26, 1985 2:59:07 pm PST, Created
Spreitzer, February 26, 1985 2:58:39 pm PST
Added TRUSTED to access bit table, due to Beach's change to explicit VM management.
changes to: DIRECTORY, AnalyzeTable