-- FILE: Hash.mesa
-- Last edited by Ousterhout, May 10, 1983 10:43 am

-- This file defines the interface to a simple hash table module that binds
-- strings to values.

DIRECTORY
   IO,
	Rope;

Hash:  CEDAR DEFINITIONS =
BEGIN

NewTable: PROC[nBuckets: INT ← 16] RETURNS [table: Table];
    -- Creates a new HashTable with the given number of buckets.  It's
    -- OK to start HashTables out small:  they will grow automatically
    -- as more entries are allocated with HashFind.

Find: PROC[table: Table, name: Rope.ROPE] RETURNS [entry: Entry]; 
    -- Finds an entry by the given name in the given table.  If there
    -- isn't any entry by that name, a new one is allocated.  If the table
    -- becomes too large, it is grown automatically.  For new entries, the
    -- clientData field is always NIL.
  
Enumerate: PROC[table: Table, pattern: Rope.ROPE, proc: EnumProc,
    clientData: REF ANY ← NIL, errorStream: IO.STREAM ← NIL];
    -- Enumerates all the entries in table with names that match the
    -- pattern by calling proc with the entry and clientData as arguments.
    -- If an error occurs and errorStream isn't NIL, then diagnostics are
    -- output.  The pattern can have any of the following forms:
    -- 1. "*x":  matches all elements of the table whose names contain
    -- the substring x.  Since this involves a complete search of the entire
    -- hash table, it may be slow.
    -- 2. "a<x:y>b": matches names of the form acb where c gets successive
    -- numerical values between x and y, inclusive.  Each string will be
    -- looked up in the hash table.  This is relatively fast.  Use backslash
    -- as a quote character to get < and > characters in the string without
    -- special interpretation.
    -- 3. Strings not in the above categories:  they are just looked up
    -- as they are.
    -- The * and <> forms may be combined.  Note: a pattern of "*" will
    -- enumerate all of the entries in the table.  TRUE is returned if the
    -- pattern was correct, FALSE if it didn't make sense.
        
Stats: PUBLIC PROC[table: Table, stream: IO.STREAM];
    -- Print stats about bucket utilization on stream.

Entry: TYPE = REF EntryRec;
EntryRec: TYPE = RECORD [

    -- Each entry points to its reference rope, a piece of client data, and a
    -- link to the next entry in the same bucket.  There is also a pointer
    -- to the next entry in the overall list for the whole table (see the
    -- comment below under tables).  The information is visible
    -- to clients, but should be read-only except for clientData.
    
    name: Rope.ROPE,
    clientData: REF ANY,
    nextInBucket: Entry,
    nextOverall: Entry
    ];

EnumProc: TYPE = PROC[entry: Entry, clientData: REF ANY];
    -- Client procedure called during hash table enumeration.

Table: TYPE = REF TableRec;
TableRec: TYPE = RECORD [

    -- A hash table contains a pointer to an array (sequence) of buckets,
    -- each of which is a linked list of entries that hash to the same bucket.
    -- In addition, the table contains a count of the total number of entries
    -- we've made in this table (so we know when it's getting too full),
    -- and some random information used by the hashing function.  In
    -- addition, the entries are linked together in a list.  This isn't strictly
    -- necessary, since the entries can all be found by searching the
    -- buckets, but it allows searching in order of allocation, which tends
    -- to get things that are close together in virtual memory (for less
    -- page thrashing).
    
    buckets: REF Buckets,
    nEntries: CARDINAL,
    hashDivide: INT,
    hashMask: CARDINAL,
    firstEntry: Entry
    ];

Buckets: TYPE = RECORD [
    s: SEQUENCE length: CARDINAL OF Entry
    ];

END.