-- FILE: HashImpl.mesa -- Last edited by Ousterhout, May 10, 1983 10:53 am -- Barth, February 14, 1986 3:21:36 pm PST DIRECTORY Hash, Basics, IO, Rope; HashImpl: CEDAR PROGRAM IMPORTS Basics, IO, Rope EXPORTS Hash = BEGIN OPEN Hash; -- The following constant gives the ratio of entries to buckets at which -- we decide that a hash table is too full and rebuild it with a larger size. rebuildLimit: INT = 3; NewTable: PUBLIC PROC[nBuckets: INT _ 16] RETURNS [table: Table] = BEGIN size: INT _ 16; table _ NEW[TableRec]; -- Round the size up to the next power of two. table.hashDivide _ 128*128; table.hashDivide _ table.hashDivide*128*8; table.hashMask _ 17B; WHILE (size < nBuckets) DO size _ size*2; table.hashMask _ table.hashMask*2 + 1; table.hashDivide _ table.hashDivide/2; ENDLOOP; table.nEntries _ 0; table.buckets _ NEW[Buckets[size]]; table.firstEntry _ NIL; RETURN [table]; END; Hash: PROC[table: Table, name: Rope.ROPE] RETURNS [CARDINAL] = -- This is a private procedure to compute a hash table bucket -- index based on a rope. BEGIN i: INT _ 0; val: INT; FOR j: INT IN [0..Rope.Length[name]) DO i _ (10*i) + (Rope.Fetch[name, j] - '0); ENDLOOP; val _ ((i*1103515245) + 12345) / table.hashDivide; RETURN [Basics.BITAND[table.hashMask, Basics.LowHalf[val]]]; END; Find: PUBLIC PROC[table: Table, name: Rope.ROPE] RETURNS [entry: Entry] = BEGIN index: CARDINAL _ Hash[table, name]; -- See if the entry is already in the table. entry _ table.buckets[index]; WHILE entry # NIL DO IF Rope.Equal[name, entry.name] THEN RETURN [entry]; entry _ entry.nextInBucket; ENDLOOP; -- The entry wasn't found. Before allocating a new entry, -- see if we're overloading the buckets. If so, then make a -- bigger table. IF table.nEntries > (rebuildLimit*table.buckets.length) THEN BEGIN Rebuild[table]; index _ Hash[table, name]; END; entry _ NEW[EntryRec]; entry.name _ name; entry.clientData _ NIL; entry.nextInBucket _ table.buckets[index]; table.buckets[index] _ entry; table.nEntries _ table.nEntries + 1; entry.nextOverall _ table.firstEntry; table.firstEntry _ entry; RETURN [entry]; END; Rebuild: PROC[table: Table] = -- This is a private procedure that rebuilds a hash table, making it -- four times larger than it used to be. BEGIN old: REF Buckets _ table.buckets; entry, next: Entry; index: CARDINAL; table.buckets _ NEW[Buckets[4*old.length]]; table.hashDivide _ table.hashDivide/4; table.hashMask _ (table.hashMask * 4) + 3; FOR i:CARDINAL IN [0..old.length) DO entry _ old[i]; WHILE (entry # NIL) DO next _ entry.nextInBucket; index _ Hash[table, entry.name]; entry.nextInBucket _ table.buckets[index]; table.buckets[index] _ entry; entry _ next; ENDLOOP; ENDLOOP; END; Enumerate: PUBLIC PROC[table: Table, pattern: Rope.ROPE, proc: EnumProc, clientData: REF ANY _ NIL, errorStream: IO.STREAM _ NIL] = BEGIN prefix: Rope.ROPE _ NIL; postfix: Rope.ROPE _ NIL; length: INT _ Rope.Length[pattern]; pos, next, last: INT; oldPos: INT _ 0; c: CHARACTER; gotRange: BOOLEAN _ FALSE; gotAny: BOOLEAN _ TRUE; gotStar: BOOLEAN _ FALSE; entry: Entry; stream: IO.STREAM; -- Check for the star that means we do pattern matching. IF length > 0 THEN BEGIN IF Rope.Fetch[pattern, 0] = '* THEN BEGIN pattern _ Rope.Substr[pattern, 1, length-1]; length _ length-1; gotStar _ TRUE; END; END; -- Strip off the prefix (everything in the string up to the "<"). WHILE TRUE DO pos _ Rope.SkipTo[pattern, oldPos, "\\<"]; prefix _ Rope.Concat[prefix, Rope.Substr[pattern, oldPos, pos-oldPos]]; oldPos _ pos + 1; IF pos = length THEN EXIT; IF pos >= length-1 THEN BEGIN IF errorStream # NIL THEN IO.PutF[errorStream, "Can't end pattern with \"%c\".\n", IO.char[Rope.Fetch[pattern, length-1]]]; RETURN; END; c _ Rope.Fetch[pattern, pos]; IF c = '< THEN EXIT; oldPos _ pos+2; prefix _ Rope.Concat[prefix, Rope.Substr[pattern, pos+1, 1]]; ENDLOOP; -- Grab up the range. IF pos < length THEN BEGIN pos _ Rope.SkipTo[pattern, oldPos, ">"]; IF pos < length THEN BEGIN ENABLE ANY => BEGIN IF errorStream # NIL THEN IO.PutRope[errorStream, "Syntax error in range specifier.\n"]; GOTO quit; END; stream _ IO.RIS[Rope.Substr[pattern, oldPos, pos-oldPos]]; next _ IO.GetInt[stream]; IF IO.GetChar[stream] # ': THEN BEGIN IF errorStream # NIL THEN IO.PutRope[errorStream, "Missing \":\" in range.\n"]; RETURN; END; last _ IO.GetInt[stream]; gotRange _ TRUE; END ELSE BEGIN IF errorStream # NIL THEN IO.PutRope[errorStream, "Missing \">\" in range.\n"]; RETURN; END; oldPos _ pos+1; -- Grab up the postfix (everything in the string after the ">"). -- Be sure to ignore any backslashes. WHILE TRUE DO pos _ Rope.SkipTo[pattern, oldPos, "\\"]; postfix _ Rope.Concat[postfix, Rope.Substr[pattern, oldPos, pos-oldPos]]; IF pos = length THEN EXIT; IF pos >= length-1 THEN BEGIN IF errorStream # NIL THEN IO.PutRope[errorStream, "Can't end pattern in \"\\\"\n."]; RETURN; END; postfix _ Rope.Concat[postfix, Rope.Substr[pattern, pos+1, 1]]; oldPos _ pos+2; ENDLOOP; END; WHILE gotAny DO -- First of all, generate a pattern to test. IF gotRange THEN BEGIN pattern _ IO.PutFR["%s%d%s", IO.rope[prefix], IO.int[next], IO.rope[postfix]]; IF next > last THEN next _ next-1 ELSE IF next < last THEN next _ next+1 ELSE gotAny _ FALSE; END ELSE BEGIN gotAny _ FALSE; pattern _ prefix; END; -- Now either look it up or test it against all entries in the table. IF gotStar THEN BEGIN entry _ table.firstEntry; WHILE entry # NIL DO IF (length=0 OR (Rope.Find[entry.name, pattern, 0] >= 0)) AND (entry.clientData # NIL) THEN proc[entry, clientData]; entry _ entry.nextOverall; ENDLOOP; END ELSE BEGIN entry _ Find[table, pattern]; IF entry.clientData # NIL THEN proc[entry, clientData] ELSE IF errorStream # NIL THEN IO.PutF[errorStream, "%s isn't in table.\n", IO.rope[pattern]]; END; ENDLOOP; RETURN; EXITS quit => RETURN; END; Stats: PUBLIC PROC[table: Table, stream: IO.STREAM] = BEGIN count: ARRAY[0..10) OF INT; overflow: INT _ 0; max: INT _ 0; j: INT; entry: Entry; FOR i:CARDINAL IN [0..10) DO count[i] _ 0 ENDLOOP; FOR i:CARDINAL IN [0..table.buckets.length) DO j _ 0; entry _ table.buckets[i]; WHILE entry # NIL DO j _ j+1; entry _ entry.nextInBucket; ENDLOOP; IF j < 10 THEN count[j] _ count[j] + 1 ELSE overflow _ overflow + 1; IF j > max THEN max _ j; ENDLOOP; FOR i:CARDINAL IN [0..10) DO IO.PutF[stream, "Number of buckets with %d entries: %d.\n", IO.int[i], IO.int[count[i]]]; ENDLOOP; IO.PutF[stream, "Number of buckets with > 9 entries: %d.\n", IO.int[overflow]]; IO.PutF[stream, "Largest bucket has %d entries.\n", IO.int[max]]; END; END.