-- File: RedBlackTreeRefImpl.mesa
-- Last edited by:
--   MBrown on March 2, 1983 6:43 pm


-- This is an "ordered symbol table" implementation, using 2-3-4 trees.  This version uses
--REF (ANY) as the "item" type, giving up some efficiency for extra flexibility.  It also
--represents each table as an object monitor.

--      Notes on the data structure

--  The color of a node should be thought of as the color of the edge coming into the
--node. The general color invariants for red-black trees are that (1) all external nodes
--are black, while internals are red or black, and (2) all paths from an internal node
--to an external node contain the same number of black arcs.

--  The tree edges of a 2-3-4 tree become black arcs in the binarized red-black
--representation, while the internal 2-, 3-, and 4-nodes are represented by connected
--red subtrees: the degenerate one with one node, both of the possible two node trees,
--and the completely balanced tree on three nodes.  Another way of stating this
--invariant is to say that no path contains two consecutive red arcs.

--  The top-down insertion algorithm below is from Program 5 of "A Dichromatic Framework
--for Balanced Trees" by Leo Guibas and Bob Sedgewick.  The deletion algorithm
--incorporates several changes to the algorithm in that paper.  This Cedar program has
--evolved from Dave Gifford's StringStorageImpl.mesa.


  DIRECTORY
    Environment USING [Comparison],
    OrderedSymbolTableRef USING[Key, Item],
    SafeStorage;

RedBlackTreeRefImpl: CEDAR MONITOR LOCKS t USING t: Table
  IMPORTS
    SafeStorage
  EXPORTS
    OrderedSymbolTableRef
  = BEGIN

  systemZone: ZONE = SafeStorage.GetSystemZone[];

  RedBlack: TYPE = BOOL;
  Red: BOOL = TRUE;
  Black: BOOL = FALSE;
  Comparison: TYPE = Environment.Comparison;

  DuplicateKey: PUBLIC ERROR = CODE;
  BadTable: PUBLIC ERROR = CODE;
    
  Table: TYPE = REF TableObject;
  TableObject: PUBLIC TYPE = MONITORED RECORD [
    h: Node,
      -- Permanent root of red-black tree, with (in effect) key = MinusInfinity.  Simplifies
      --rebalancing.
    z, y: Node,
      -- z represents NIL.  y represents both children of NIL.  These simplify rebalancing, too.
      -- z.Item = NIL.
    numItems: INT,
      -- Number of items currently stored in the table.
    compareProc: PROC [Item, Item] RETURNS [Comparison],
      -- Tells how to compare two items stored in the table
    nodeZone: ZONE
      -- Where to get storage for new items.
    ];

  Node: TYPE = REF NodeRec;
  NodeRec: TYPE = RECORD [
    rbColor: RedBlack ← NULL,
    rbRLink, rbLLink: Node ← NIL,
    item: Item ← NIL];

  Item: TYPE = OrderedSymbolTableRef.Item;
  Key: TYPE = OrderedSymbolTableRef.Key;

  -- Procedures exported to OrderedSymbolTableRef

  CreateTable: PUBLIC PROC[
    compareProc: PROC [Item, Item] RETURNS [Comparison],
    nodeZone: ZONE, tableZone: ZONE]
    RETURNS [t: Table] = {
    IF tableZone = NIL THEN tableZone ← systemZone;
    IF nodeZone = NIL THEN nodeZone ← systemZone;
    t ← tableZone.NEW[TableObject ←
      [h: nodeZone.NEW[NodeRec ← [rbColor: Black]],
       z: nodeZone.NEW[NodeRec ← [rbColor: Black]],
       y: nodeZone.NEW[NodeRec ← [rbColor: Red]],
       numItems: 0, compareProc: compareProc, nodeZone: nodeZone]];
    t.h.rbRLink ← t.z;
    t.z.rbRLink ← t.z.rbLLink ← t.y;
    };--CreateTable

  Size: PUBLIC ENTRY PROC[t: Table] RETURNS [nItems: LONG INTEGER] = {
    RETURN[t.numItems];
    };--Size

  DestroyTable: PUBLIC ENTRY PROC [t: Table] = TRUSTED {
    -- This should be "improved" by traversing the tree and undoing all the links?
    FREE[@t.h];  FREE[@t.z];  FREE[@t.y];
    t.numItems ← 0;
    t.compareProc ← NIL;
    --FREE[@t];
    };--DestroyTable

  DeleteAllItems: PUBLIC ENTRY PROC [t: Table] = TRUSTED {
    -- This should be "improved" by traversing the tree and undoing all the links?
    FREE[@t.h.rbRLink];
    t.h.rbRLink ← t.z;
    t.numItems ← 0;
    };--DeleteAllItems;

  Insert: PUBLIC ENTRY PROC[t: Table, insertItem: Item] = {
    -- Requires t.h.rbColor = t.h.rbRLink.rbColor = z.rbColor = Black, t.y.rbColor = Red.
    x, gg, g, f: Node;
    z: Node = t.z;
    keyAlreadyPresent: BOOL ← TRUE;
    c: Comparison ← greater; --since actual tree sits in right subtree of header node...
    -- search the tree, rebalancing on the way down.
    f ← t.h; x ← f.rbRLink;
    DO
      IF (x.rbLLink.rbColor=Red) AND (x.rbRLink.rbColor=Red) THEN {
        IF x=z THEN {
          -- an external node has been found; check to be sure that a duplicate key is not present.
	  IF c = equal THEN EXIT;
	  -- perform insertion.
          x ← t.nodeZone.NEW[NodeRec ← [rbLLink: z, rbRLink: z, item: insertItem]];
	  keyAlreadyPresent ← FALSE;
          IF c = less THEN f.rbLLink ← x ELSE f.rbRLink ← x;
          c ← equal
	  };--IF x is external
        -- do color flip.
        x.rbLLink.rbColor ← Black;  x.rbRLink.rbColor ← Black;  x.rbColor ← Red;
        IF f.rbColor=Red THEN {
          -- two reds in a row, so rebalance (gg may be t.h).
          g ← Balance[gg, g, f, x];
          x ← g;
        };--IF parent is red
      };--IF both children are red
      IF c = equal THEN EXIT;
      gg ← g; g ← f; f ← x;
      x ← IF (c ← t.compareProc[insertItem, x.item]) = less THEN x.rbLLink ELSE x.rbRLink;
    ENDLOOP;
    t.h.rbRLink.rbColor ← Black; --root is always black
    IF keyAlreadyPresent THEN RETURN WITH ERROR DuplicateKey;
    t.numItems ← t.numItems + 1;
    };--Insert

  Delete: PUBLIC ENTRY PROC[t: Table, deleteKey: Item] RETURNS[deletedItem: Item] = {
    f, result, parentOfResult: Node;
    x, g, b: Node;
    z: Node = t.z;
    c: Comparison;
    result ← NIL;
    f ← t.h; x ← f.rbRLink; IF x = z THEN RETURN[NIL];
    z.item ← deleteKey; -- sentinel
    t.y.rbColor ← Black; --children of external nodes have no red to contribute to rebalancing.
    -- Inject a red node at the root if necessary.
    IF (x.rbLLink.rbColor = Black) AND (x.rbRLink.rbColor = Black) THEN
      t.h.rbRLink.rbColor ← Red;
    IF (c ← t.compareProc[deleteKey, x.item]) = equal THEN { result ← x;  parentOfResult ← f; };
    -- Search the tree for the symmetric order succecessor of the node containing key
    --(or the node itself if its RLink is z)
    DO BEGIN
      g ← f; f ← x;
      IF c = less THEN { b ← x.rbRLink;  x ← x.rbLLink; }
                   ELSE { b ← x.rbLLink;  x ← x.rbRLink; };
      -- If x is the node we're to return, save pointers to it and its parent for later.
      IF (c ← t.compareProc[deleteKey, x.item]) = equal AND x # z THEN {
        result ← x;  parentOfResult ← f
        };
      -- Note that if x is Red, no rotations happen now; this is what re-establishes the
      --properties of g and f eventually...
      IF x.rbColor=Red OR x.rbLLink.rbColor=Red OR x.rbRLink.rbColor=Red THEN LOOP;
      IF b.rbColor=Red THEN {
        -- Single rotation to move the red link b onto the search path.
        IF b = f.rbLLink THEN { f.rbLLink ← b.rbRLink;  b.rbRLink ← f; }
                        ELSE { f.rbRLink ← b.rbLLink;  b.rbLLink ← f; };
        f.rbColor ← Red;  b.rbColor ← Black;
        IF f = g.rbLLink THEN g.rbLLink ← b ELSE g.rbRLink ← b;
        -- Move back up the path to allow g and f to get re-established
        x ← b;  GOTO FixCompare;
        };--IF b.rbColor=Red
      IF x=z THEN EXIT;
      -- It is essential that this exit test be right here - after the single rotation,
      --but before the double rotations.
      x.rbColor ← Red;
      IF b.rbLLink.rbColor = Red THEN {
        b.rbLLink.rbColor ← Black;  x ← Balance[g, f, b, b.rbLLink];  GOTO FixCompare;
        };
      IF b.rbRLink.rbColor = Red THEN {
        b.rbRLink.rbColor ← Black;  x ← Balance[g, f, b, b.rbRLink];  GOTO FixCompare;
        };
      -- "Color flip"
      f.rbColor ← Black;  b.rbColor ← Red;
    EXITS
      FixCompare => c ← t.compareProc[deleteKey, x.item];
    END ENDLOOP;
    t.h.rbRLink.rbColor ← Black;
    z.rbColor ← Black;  t.y.rbColor ← Red;--undo the color mess
    z.item ← NIL;
    IF result = NIL THEN RETURN [NIL];
    -- The search has been successful; f is now the symmetric order successor of result
    --(or result itself if result has no right child), and g is its parent.  f is Red, unless
    --it is also the root.  x and b are irrelevant.
    -- Detach f from the tree.
    IF g.rbLLink = f THEN g.rbLLink ← z ELSE g.rbRLink ← z;
    -- If f is not the result node, splice f into tree in place of result.
    IF f # result THEN {
      IF parentOfResult.rbLLink = result THEN parentOfResult.rbLLink ← f
                                         ELSE parentOfResult.rbRLink ← f;
      f.rbLLink ← result.rbLLink;  f.rbRLink ← result.rbRLink;
      f.rbColor ← result.rbColor;
      };
    t.numItems ← t.numItems - 1;
    RETURN[result.item];
    };--Delete

  Lookup: PUBLIC ENTRY PROC [t: Table, lookupKey: Key] RETURNS [equalItem: Item] = {
    x: Node ← t.h.rbRLink;
    z: Node = t.z;
    c: Comparison;
    UNTIL x = z OR (c ← t.compareProc[lookupKey, x.item]) = equal DO
      IF c = less THEN x ← x.rbLLink ELSE x ← x.rbRLink;
    ENDLOOP;
    RETURN[x.item];
    };--Lookup


  Lookup3: PUBLIC ENTRY PROC [t: Table, lookupKey: Key]
   RETURNS [leftItem, equalItem, rightItem: Item] = {
    l, r: Node ← NIL;
    x: Node ← t.h.rbRLink;
    z: Node = t.z;
    c: Comparison;
    UNTIL x = z OR (c ← t.compareProc[lookupKey, x.item]) = equal DO
      IF c = less THEN { r ← x;  x ← x.rbLLink; } ELSE { l ← x;  x ← x.rbRLink; };
    ENDLOOP;
    IF x # z THEN {
      IF x.rbLLink # z THEN {
        l ← x.rbLLink;
        WHILE l.rbRLink # z DO l ← l.rbRLink ENDLOOP };
      IF x.rbRLink # z THEN {
        r ← x.rbRLink;
        WHILE r.rbLLink # z DO r ← r.rbLLink ENDLOOP };
      };
    RETURN[IF l=NIL THEN NIL ELSE l.item, x.item, IF r=NIL THEN NIL ELSE r.item];
    };--Lookup3


  EnumerateIncreasing: PUBLIC PROC[t: Table, procToApply: PROC [Item] RETURNS [BOOL]] = {
    -- Note that this proc is EXTERNAL
    x: Item;
    x ← LookupSmallest[t];
    WHILE x#NIL DO
      IF procToApply[x] THEN EXIT;
      x ← LookupNextLarger[t, x];
    ENDLOOP;
    };--EnumerateIncreasing


  LookupSmallest: PUBLIC ENTRY PROC[t: Table] RETURNS[smallestItem: Item] = {
    x: Node ← t.h.rbRLink;
    z: Node = t.z;
    IF x=z THEN RETURN[NIL];
    UNTIL x.rbLLink=z DO  x ← x.rbLLink  ENDLOOP;
    RETURN[x.item];
    };--LookupSmallest


  LookupNextLarger: PUBLIC ENTRY PROC[t: Table, lookupKey:Item]
    RETURNS[largerItem: Item] = {
    x, xl: Node;
    z: Node = t.z;
    xl ← z;  x ← t.h.rbRLink;
    UNTIL x=z DO
      IF t.compareProc[lookupKey, x.item] = less THEN
        BEGIN xl ← x; x ← x.rbLLink END
      ELSE x ← x.rbRLink;
    ENDLOOP;
    RETURN[xl.item];
    };--LookupNextLarger

  Assert: PROC[p: BOOL] = {
    IF NOT p THEN ERROR BadTable;
    };--Assert

  CheckTable: PUBLIC PROC [t: Table] = {
    --  ERRORs BadTable if the table t is not well-formed.
    z: Node = t.z;
    count: LONG INTEGER ← 0;

    Check1: PROC [x: Node, maxKey: Item] RETURNS [Item, INTEGER, BOOL] = {
      --  Returns largest key in subtree rooted at x, number of black arcs on paths to
      --external nodes from x, and whether or not this node is red.  ERRORs if it finds
      --two reds in a row, finds different distances to the external nodes (counting
      --only red arcs), or finds keys out of order.  If x is an internal node,
      --increments count.
      dl, dr: INTEGER;
      redChild: BOOL;
      IF x = z THEN BEGIN
        RETURN[maxKey, 0, FALSE];
      END;
      [maxKey, dl, redChild] ← Check1[x.rbLLink, maxKey];
      Assert[~(redChild AND (x.rbColor=Red))];
      Assert[t.compareProc[maxKey,x.item] = (IF count=0 THEN equal ELSE less)];
      count ← count + 1;
      [maxKey, dr, redChild] ← Check1[x.rbRLink, x.item];
      Assert[~(redChild AND (x.rbColor=Red))];
      Assert[dl=dr];
      RETURN[maxKey, dl+(IF x.rbColor=Black THEN 1 ELSE 0), x.rbColor=Red];
      };--Check1

    -- check structure of the table header, sentinels, etc.
    Assert[t.h.rbLLink=NIL];
    Assert[z.rbLLink=t.y];
    Assert[z.rbRLink=t.y];
    -- The following assertions may fail during intermediate states of Delete.
    --    Assert[z.rbColor=Black];
    --    Assert[y.rbColor=Red];
    --    Assert[t.h.rbRLink.rbColor=Black];
    -- check structure of table.
    IF t.numItems # 0 THEN [] ← Check1[t.h.rbRLink, LookupSmallest[t]];
    Assert[t.numItems=count];
    };--CheckTable

  RootItem: PUBLIC ENTRY PROC [t: Table] RETURNS [rootItem: Item] = {
    -- Returns NIL if tree is empty
    RETURN[t.h.rbRLink.item] };

  --  Private procedure, used by Insert (1 call), Delete (2 calls).

  Balance: INTERNAL PROC [gg, g, f, x: Node] RETURNS [Node] = {
    -- Balances a local portion of the tree.
    -- g is a child of gg (it may be either child).
    -- if g -rlink-> f -rlink-> x, or x <-llink- f <-llink- g, then only a single
    --rotation is needed; otherwise two rotations are used.
    -- when called from insert, g is black and both f and x are red.
    -- after rebalancing, the new g is black, and is the parent of both the new f and
    --the new x, which are both red.

    t: Node;
    tc: RedBlack;
    -- do first rotation if necessary
    IF f=g.rbLLink THEN {
      IF x=f.rbRLink THEN {
        f.rbRLink ← x.rbLLink;  x.rbLLink ← f;
        t ← f; f ← x; x ← t;
      };
    }
    ELSE {
      IF x=f.rbLLink THEN {
        f.rbLLink ← x.rbRLink;  x.rbRLink ← f;
        t ← f; f ← x; x ← t;
      };
    };
    -- do second rotation
    IF x=f.rbLLink THEN { g.rbLLink ← f.rbRLink;  f.rbRLink ← g; }
                   ELSE { g.rbRLink ← f.rbLLink;  f.rbLLink ← g; };
    -- update link in great grandparent
    IF g = gg.rbRLink THEN gg.rbRLink ← f ELSE gg.rbLLink ← f;
    -- do color swap (when called from Insert, g is always Black and f is always Red)
    tc ← g.rbColor;  g.rbColor ← f.rbColor;  f.rbColor ← tc;
    -- return g, new child of gg (name changed in second rotation)
    RETURN[f];
    };--Balance

END.--RedBlackTreeRefImpl

CHANGE LOG

Created by MBrown on January 18, 1980  10:58 AM
-- It worked the first time!

Changed by MBrown on January 19, 1980  8:50 PM
-- Implemented EnumerateIncreasing.  Fixed a bug in CheckTable that defeated checking of
--key order.  Made changes to allow MinusInfinity as a Key.  Changed Insert to use
--keyPresent flag, as in Guibas-Sedgewick Program 5.

Changed by MBrown on 20-Jan-80 13:28
-- Changed Balance to compare pointers rather than keys, and eliminated the code for
--"swap g and f" since only g is returned from Balance.

Changed by MBrown on March 4, 1980  2:17 PM
-- Moved inline definitions of Node structure to OrderedSymbolTable, which we now OPEN.

Changed by MBrown on April 13, 1980  4:21 PM
-- Coded Delete.  Made LookupSmallest and LookupNextLarger PUBLIC, since they may be
--useful in testing Delete.

Changed by MBrown on April 13, 1980  6:01 PM
-- Delete seemed to work the first time, a surprise.  I removed some the redundant
--checks that were there for debugging; the old version will be called
--RedBlackCheckedImpl.

Changed by MBrown on May 1, 1980  10:03 PM
-- Changed Delete to avoid a search from the root to find the parent of the result node.
--This involved actually understanding the algorithm; "cleaning up" the way rebalancing
--is done introduced several bugs.

Changed by MBrown on May 7, 1980  6:36 PM
-- Added DestroyTable and Size.  We now allocate/free TableObjects using procs in the
--OrderedSymbolTable interface.  Renamed module.

Changed by MBrown on August 11, 1980  10:17 PM
-- Major revisions (prompted by first real client, the Juniper locks code, which will
--make further changes for its special needs).  We take the key comparison function from
--the interface, instead of requiring "<" to be applicable.  This means that comparisons
--may be expensive, so we save the result in a local variable to avoid doing twice the
--required number.  MinusInfinity is no longer treated specially in the code.  We do
--explicit field selections for the links and color fields, instead of using inline
--procs from the interface.  Added Lookup3 proc for Juniper locks application.  We now
--pass in ALL storage, including storage for table objects.  y and z now point to nodes
--in the global frame of this module, rather than being replicated for each table; thus
--it is now unsafe for multiple processes to be inside of an instance of this module,
--even if they operate on distinct tables (it was always unsafe to perform concurrent
--operations on a single table).

Changed by MBrown on October 29, 1980  8:42 PM
-- Eliminated all reference to MinusInfinity (it was being used in CheckTable).

Changed by MBrown on January 7, 1981  9:47 PM
-- Changes for collectible storage, module monitor.

Changed by MBrown on 12-Apr-81 11:42:11
-- Created this version for use in library without recompilation.  Table is now an object monitor.

Changed by MBrown on 20-Aug-81 15:40:55
-- Bug fix: Insert did not detect duplicate key if it was the node above z in search.  Need to improve test
--program to exercise this case.

Changed by MBrown on June 28, 1982 11:31 am
-- CEDAR implementation; added DeleteAllItems.  TRUSTED around FREE due to compiler bug.

Changed by MBrown on August 27, 1982 4:53 pm
-- Use Environment.Comparison.

Changed by MBrown on November 17, 1982 10:50 am
-- In Insert, RETURN WITH ERROR DuplicateKey.

Changed by MBrown on January 31, 1983 10:57 am
-- In Lookup and Lookup3, flushed use of z as a sentinel (since it conflicts with the published
--restrictions on CompareProcs).

Changed by MBrown on March 2, 1983 6:43 pm
-- In Delete, fix bug when result = NIL (used to blindly return result.item).