DIRECTORY IO USING [Close, STREAM, RIS, GetInt, EndOfStream, GetChar, PutF, int, real, Backup], RealFns USING [Power], RealOps USING [Float, RoundLI], BitTableLookup USING [Create, Insert, Write, Table, RowPrime, NumberHashFunctions], CommandTool USING [Parse, ArgumentVector], FS USING [StreamOpen], Rope USING [ROPE], Commander USING [CommandProc, Register]; BuildBitTableImpl: CEDAR PROGRAM IMPORTS IO, Commander, CommandTool, FS, BitTableLookup, RealOps, RealFns = BEGIN ROPE: TYPE = Rope.ROPE; STREAM: TYPE = IO.STREAM; WrongNumberOfArgs: ERROR = CODE; GetToken: PROC [s: STREAM, word: REF TEXT] RETURNS [w: REF TEXT] = { UnPrintable: PROC [c: CHAR] RETURNS [unPrintable: BOOL] = INLINE { unPrintable _ c IN ['\000 .. '\040]; }; c: CHAR; w _ word; WHILE UnPrintable[c _ s.GetChar[]] DO ENDLOOP; s.Backup[c]; w.length _ w.maxLength; FOR i: CARDINAL _ 0, i+1 DO c _ s.GetChar[ ! IO.EndOfStream => GOTO done]; IF UnPrintable[c] THEN GOTO done; IF i >= w.maxLength THEN { w _ NEW[TEXT[2*w.maxLength+1]]; w.length _ w.maxLength; }; w[i] _ c; REPEAT done => w.length _ i; ENDLOOP; }; AnalyzeTable: PROC [ t: BitTableLookup.Table, s: STREAM ] = { fineArray: ARRAY [0..100) OF INT _ ALL[0]; CoarseType: TYPE = ARRAY [0..13000) OF INT; coarseArray: REF CoarseType _ NEW[CoarseType]; lastBitPos: INT _ -1; thisBitPos: INT _ 0; bitDistance: INT _ 0; totalBits: INT _ 0; IO.PutF[s, "Analyzing table\n"]; FOR i : CARDINAL IN [0..13000) DO coarseArray[i] _ 0; ENDLOOP; FOR i: CARDINAL IN [0..t.rowSize) DO FOR j: CARDINAL IN [0..BitTableLookup.RowPrime) DO thisBit: BOOL; thisBitPos _ thisBitPos + 1; TRUSTED {thisBit _ t.bits[i][j]}; IF thisBit THEN { totalBits _ totalBits + 1; bitDistance _ thisBitPos - lastBitPos - 1; IF bitDistance < 100 THEN fineArray[bitDistance] _ fineArray[bitDistance] + 1 ELSE coarseArray[bitDistance/10] _ coarseArray[bitDistance/10]+1; lastBitPos _ thisBitPos; }; ENDLOOP; ENDLOOP; IO.PutF[ s, "A total of %g bits were turned on\n", IO.int[totalBits]]; FOR i: CARDINAL IN [0..100) DO IF fineArray[i] # 0 THEN IO.PutF[ s, "Number with distance %g is %g\n", IO.int[i], IO.int[fineArray[i]]]; ENDLOOP; FOR i: CARDINAL IN [0..13000) DO IF coarseArray[i] # 0 THEN IO.PutF[ s, "Number with distance %g-%g is %g\n", IO.int[i*10], IO.int[i*10+9],IO.int[fineArray[i]]]; ENDLOOP; }; FindGoodBitSize: PROC [desiredSize: INT] RETURNS [betterSize: INT] = { IsPrime: PROC[number: INT] RETURNS [prime: BOOL _ FALSE] = { pp: INT _ 3; WHILE pp*pp <= number DO IF (number MOD pp) = 0 THEN RETURN; pp _ pp + 2; ENDLOOP; prime _ TRUE; }; p: INT _ desiredSize / BitTableLookup.RowPrime; IF p MOD 2 = 0 THEN p _ p + 1; UNTIL IsPrime[p] DO p _ p + 2 ENDLOOP; betterSize _ p * BitTableLookup.RowPrime; }; BuildBitTable: Commander.CommandProc = { cmdLine: CommandTool.ArgumentVector _ CommandTool.Parse[cmd]; stderr, wordFile, bitFile: STREAM; wordsWanted: INT; bitSize: INT; word: REF TEXT _ NEW[TEXT[100]]; table: BitTableLookup.Table; numWords: INT _ 0; stderr _ cmd.out; IF cmdLine.argc # 4 THEN { IO.PutF[stderr, "Usage: BuildBitTable \n"]; RETURN; }; wordsWanted _ IO.GetInt[IO.RIS[cmdLine[1]]]; wordFile _ FS.StreamOpen[cmdLine.s[2]]; bitFile _ FS.StreamOpen[cmdLine.s[3], create]; IO.PutF[stderr, "Building a table to hold %g words.\n", IO.int[wordsWanted]]; IO.PutF[stderr, "The bit table package is compiled with %g hash functions.\n", IO.int[BitTableLookup.NumberHashFunctions[]]]; bitSize _ RealOps.RoundLI[0.4+RealOps.Float[BitTableLookup.NumberHashFunctions[]]*2.71828]; IO.PutF[stderr, "Building the table with %g bits per word.\n", IO.int[bitSize]]; bitSize _ FindGoodBitSize[bitSize*wordsWanted]; IO.PutF[stderr, "The table will have %g bits.\n", IO.int[bitSize]]; IO.PutF[stderr, "The probability of a false positive should be around %g.\n", IO.real[RealFns.Power[RealOps.Float[wordsWanted*BitTableLookup.NumberHashFunctions[]]/ RealOps.Float[bitSize], RealOps.Float[BitTableLookup.NumberHashFunctions[]]]]]; table _ BitTableLookup.Create[bitSize-1]; IO.PutF[stderr, "Reading and inserting words.\n"]; DO word _ GetToken[wordFile, word ! IO.EndOfStream => GOTO eof]; table.Insert[word]; numWords _ numWords + 1; REPEAT eof => NULL; ENDLOOP; table.Write[bitFile]; IO.Close[wordFile]; IO.Close[bitFile]; IO.PutF[stderr, "Inserted %g words.\n", IO.int[numWords]]; IO.PutF[stderr, "Ideally, %g out of the %g bits should be turned on.\n", IO.int[BitTableLookup.NumberHashFunctions[]*numWords], IO.int[bitSize]]; AnalyzeTable[table, stderr]; }; Commander.Register[ key: "BuildBitTable", proc: BuildBitTable, doc: "Builds a bit table dictionary."]; END. CHANGE LOG Created by Nix on February 26, 1985 2:59:07 pm PST, Created File: BuildBitTableImpl.mesa Last Edited by: Nix, October 27, 1983 1:36 pm Spreitzer, February 26, 1985 2:59:07 pm PST Utility for reading a token from the given stream into the given buffer. How's this for device independence? Spreitzer, February 26, 1985 2:58:39 pm PST Added TRUSTED to access bit table, due to Beach's change to explicit VM management. changes to: DIRECTORY, AnalyzeTable Κ˜– "Cedar" stylešœ™J™-Icode™+—unitšΟk ˜ Kšœœ œœ9˜UKšœœ ˜Kšœœ˜Kšœœ?˜SKšœ œ˜*Kšœœ˜Kšœœœ˜Kšœ œ˜(—šΟbœœ˜ Kšœœœ%˜K—Kš˜Kšœœœ˜Kšœœœœ˜Kšœœœ˜ K˜šΟnœœœœœœœœ˜DJšœH™Hš Ÿ œœœœœœ˜BJšœ#™#Jšœœ˜$J˜—Jšœœ˜J˜ Jšœœœ˜/J˜ J˜šœœ ˜Jšœœœ˜.Jšœœœ˜!šœœ˜Jšœœœ˜J˜J˜—J˜ š˜J˜—Jšœ˜—˜J˜—J˜—K˜šŸ œœœ˜=Kš œ œ œœœ˜*Kš œ œœ œœ˜+Kšœœœ ˜/Kšœ œ˜Kšœ œ˜Kšœ œ˜Kšœ œ˜Kšœ˜ šœœœ ˜!K˜Kšœ˜—šœœœ˜$šœœœœ˜3Kšœ œ˜K˜Kšœ˜!šœ œ˜K˜K˜*šœ˜K˜3—š˜K˜<—K˜K˜—Kšœ˜—Kšœ˜—Kšœ1œ˜Fšœœœ œ˜šœœ˜Kšœ-œ œ˜P—Kšœ˜—šœœœ œ˜!šœœ˜Kšœ0œ œ œ˜e—Kšœ˜—K˜K˜K˜—š Ÿœœœœœ˜Fš Ÿœœ œœ œœ˜