FILE: HashImpl.mesa
Last edited by Ousterhout, May 10, 1983 10:53 am
Barth, February 14, 1986 3:21:36 pm PST
Christian LeCocq May 16, 1986 11:54:40 am PDT
DIRECTORY
Hash,
Basics,
IO,
Rope,
RopeHash;
HashImpl: CEDAR PROGRAM
IMPORTS
Basics,
IO,
Rope,
RopeHash
EXPORTS
Hash =
BEGIN OPEN Hash;
The following constant gives the ratio of entries to buckets at which we decide that a hash table is too full and rebuild it with a larger size.
rebuildLimit: INT = 3;
NewTable: PUBLIC PROC[nBuckets: INT ← 16] RETURNS [table: Table] =
BEGIN
size: INT ← 16;
table ← NEW[TableRec];
Round the size up to the next power of two.
table.hashDivide ← 128*128;
table.hashDivide ← table.hashDivide*128*8;
table.hashMask ← 17B;
WHILE (size < nBuckets) DO
size ← size*2;
table.hashMask ← table.hashMask*2 + 1;
table.hashDivide ← table.hashDivide/2;
ENDLOOP;
table.nEntries ← 0;
table.buckets ← NEW[Buckets[size]];
table.firstEntry ← NIL;
RETURN [table];
END;
Hash: PROC[table: Table, name: Rope.ROPE] RETURNS [CARDINAL] =
This is a private procedure to compute a hash table bucket index based on a rope.
BEGIN
i: INT ← 0;
val: INT;
FOR j: INT IN [0..Rope.Length[name]) DO
i ← (10*i) + (Rope.Fetch[name, j] - '0);
ENDLOOP;
val ← ((i*1103515245) + 12345) / table.hashDivide;
val ← RopeHash.FromRope[rope: name];
RETURN [Basics.BITAND[table.hashMask, Basics.LowHalf[val]]];
END;
Find: PUBLIC PROC[table: Table, name: Rope.ROPE] RETURNS [entry: Entry] =
BEGIN
index: CARDINAL ← Hash[table, name];
See if the entry is already in the table.
entry ← table.buckets[index];
WHILE entry # NIL DO
IF Rope.Equal[name, entry.name] THEN RETURN [entry];
entry ← entry.nextInBucket;
ENDLOOP;
The entry wasn't found. Before allocating a new entry, see if we're overloading the buckets. If so, then make a bigger table.
IF table.nEntries > (rebuildLimit*table.buckets.length) THEN
IF table.buckets.length < 16384 THEN
BEGIN
Rebuild[table];
index ← Hash[table, name];
END;
entry ← NEW[EntryRec];
entry.name ← name;
entry.clientData ← NIL;
entry.nextInBucket ← table.buckets[index];
table.buckets[index] ← entry;
table.nEntries ← table.nEntries + 1;
entry.nextOverall ← table.firstEntry;
table.firstEntry ← entry;
RETURN [entry];
END;
Rebuild: PROC[table: Table] =
This is a private procedure that rebuilds a hash table, making it four times larger than it used to be.
BEGIN
old: REF Buckets ← table.buckets;
entry, next: Entry;
index: CARDINAL;
table.buckets ← NEW[Buckets[4*old.length]];
table.hashDivide ← table.hashDivide/4;
table.hashMask ← (table.hashMask * 4) + 3;
FOR i:CARDINAL IN [0..old.length) DO
entry ← old[i];
WHILE (entry # NIL) DO
next ← entry.nextInBucket;
index ← Hash[table, entry.name];
entry.nextInBucket ← table.buckets[index];
table.buckets[index] ← entry;
entry ← next;
ENDLOOP;
ENDLOOP;
END;
Enumerate: PUBLIC PROC[table: Table, pattern: Rope.ROPE, proc: EnumProc,
clientData: REF ANYNIL, errorStream: IO.STREAMNIL] =
BEGIN
prefix: Rope.ROPENIL;
postfix: Rope.ROPENIL;
length: INT ← Rope.Length[pattern];
pos, next, last: INT;
oldPos: INT ← 0;
c: CHARACTER;
gotRange: BOOLEANFALSE;
gotAny: BOOLEANTRUE;
gotStar: BOOLEANFALSE;
entry: Entry;
stream: IO.STREAM;
Check for the star that means we do pattern matching.
IF length > 0 THEN
BEGIN
IF Rope.Fetch[pattern, 0] = '* THEN
BEGIN
pattern ← Rope.Substr[pattern, 1, length-1];
length ← length-1;
gotStar ← TRUE;
END;
END;
Strip off the prefix (everything in the string up to the "<").
WHILE TRUE DO
pos ← Rope.SkipTo[pattern, oldPos, "\\<"];
prefix ← Rope.Concat[prefix, Rope.Substr[pattern, oldPos, pos-oldPos]];
oldPos ← pos + 1;
IF pos = length THEN EXIT;
IF pos >= length-1 THEN
BEGIN
IF errorStream # NIL THEN
IO.PutF[errorStream, "Can't end pattern with \"%c\".\n", IO.char[Rope.Fetch[pattern, length-1]]];
RETURN;
END;
c ← Rope.Fetch[pattern, pos];
IF c = '< THEN EXIT;
oldPos ← pos+2;
prefix ← Rope.Concat[prefix, Rope.Substr[pattern, pos+1, 1]];
ENDLOOP;
Grab up the range.
IF pos < length THEN
BEGIN
pos ← Rope.SkipTo[pattern, oldPos, ">"];
IF pos < length THEN
BEGIN
ENABLE ANY =>
BEGIN
IF errorStream # NIL THEN
IO.PutRope[errorStream, "Syntax error in range specifier.\n"];
GOTO quit;
END;
stream ← IO.RIS[Rope.Substr[pattern, oldPos, pos-oldPos]];
next ← IO.GetInt[stream];
IF IO.GetChar[stream] # ': THEN
BEGIN
IF errorStream # NIL THEN
IO.PutRope[errorStream, "Missing \":\" in range.\n"];
RETURN;
END;
last ← IO.GetInt[stream];
gotRange ← TRUE;
END
ELSE
BEGIN
IF errorStream # NIL THEN
IO.PutRope[errorStream, "Missing \">\" in range.\n"];
RETURN;
END;
oldPos ← pos+1;
Grab up the postfix (everything in the string after the ">").
Be sure to ignore any backslashes.
WHILE TRUE DO
pos ← Rope.SkipTo[pattern, oldPos, "\\"];
postfix ← Rope.Concat[postfix, Rope.Substr[pattern, oldPos, pos-oldPos]];
IF pos = length THEN EXIT;
IF pos >= length-1 THEN
BEGIN
IF errorStream # NIL THEN
IO.PutRope[errorStream, "Can't end pattern in \"\\\"\n."];
RETURN;
END;
postfix ← Rope.Concat[postfix, Rope.Substr[pattern, pos+1, 1]];
oldPos ← pos+2;
ENDLOOP;
END;
WHILE gotAny DO
First of all, generate a pattern to test.
IF gotRange THEN
BEGIN
pattern ← IO.PutFR["%s%d%s", IO.rope[prefix], IO.int[next], IO.rope[postfix]];
IF next > last THEN next ← next-1
ELSE IF next < last THEN next ← next+1
ELSE gotAny ← FALSE;
END
ELSE
BEGIN
gotAny ← FALSE;
pattern ← prefix;
END;
Now either look it up or test it against all entries in the table.
IF gotStar THEN
BEGIN
entry ← table.firstEntry;
WHILE entry # NIL DO
IF (length=0 OR (Rope.Find[entry.name, pattern, 0] >= 0)) AND (entry.clientData # NIL)
THEN proc[entry, clientData];
entry ← entry.nextOverall;
ENDLOOP;
END
ELSE
BEGIN
entry ← Find[table, pattern];
IF entry.clientData # NIL THEN proc[entry, clientData]
ELSE IF errorStream # NIL THEN
IO.PutF[errorStream, "%s isn't in table.\n", IO.rope[pattern]];
END;
ENDLOOP;
RETURN;
EXITS
quit => RETURN;
END;
Stats: PUBLIC PROC[table: Table, stream: IO.STREAM] =
BEGIN
count: ARRAY[0..10) OF INT;
overflow: INT ← 0;
max: INT ← 0;
j: INT;
entry: Entry;
FOR i:CARDINAL IN [0..10) DO count[i] ← 0 ENDLOOP;
FOR i:CARDINAL IN [0..table.buckets.length) DO
j ← 0;
entry ← table.buckets[i];
WHILE entry # NIL DO
j ← j+1;
entry ← entry.nextInBucket;
ENDLOOP;
IF j < 10 THEN count[j] ← count[j] + 1
ELSE overflow ← overflow + 1;
IF j > max THEN max ← j;
ENDLOOP;
FOR i:CARDINAL IN [0..10) DO
IO.PutF[stream, "Number of buckets with %d entries: %d.\n", IO.int[i], IO.int[count[i]]];
ENDLOOP;
IO.PutF[stream, "Number of buckets with > 9 entries: %d.\n", IO.int[overflow]];
IO.PutF[stream, "Largest bucket has %d entries.\n", IO.int[max]];
END;
END.