-- File: RedBlackTreeImpl.mesa
-- Last edited by MBrown on October 20, 1983 11:37 pm


--      Notes on the data structure

--  The color of a node should be thought of as the color of the edge coming into the
--node. The general color invariants for red-black trees are that (1) all external nodes
--are black, while internals are red or black, and (2) all paths from an internal node
--to an external node contain the same number of black arcs.

--  The tree edges of a 2-3-4 tree become black arcs in the binarized red-black
--representation, while the internal 2-, 3-, and 4-nodes are represented by connected
--red subtrees: the degenerate one with one node, both of the possible two node trees,
--and the completely balanced tree on three nodes.  Another way of stating this
--invariant is to say that no path contains two consecutive red arcs.

--  The top-down insertion algorithm below is from Program 5 of "A Dichromatic Framework
--for Balanced Trees" by Leo Guibas and Bob Sedgewick.  The deletion algorithm
--incorporates several changes to the algorithm in that paper.  This program was
--originally adapted from Dave Gifford's StringStorageImpl.Mesa.

  DIRECTORY
    Basics USING [Comparison],
    OrderedSymbolTable USING[
      Node, Key, GetKey, Compare, Table, ErrorType];

RedBlackTreeImpl: CEDAR MONITOR
  IMPORTS OrderedSymbolTable
  EXPORTS OrderedSymbolTable
  SHARES OrderedSymbolTable
  = BEGIN OPEN OrderedSymbolTable;
  Comparison: TYPE = Basics.Comparison;

  RedBlack: TYPE = BOOL;
  Red: BOOL = TRUE;
  Black: BOOL = FALSE;

  DuplicateKey: PUBLIC ERROR = CODE;
  Error: PUBLIC ERROR [ec: ErrorType] = CODE;

  doChecking: BOOLEAN = TRUE;
    -- forces extra validity checking of inputs.

  -- Shared variables

  y: Node;
  z: Node ← NIL;

  -- Procedures exported to OrderedSymbolTable

  Initialize: PUBLIC ENTRY PROC [sentinel1, sentinel2: Node] = {
    sentinel1.rbColor ← Red; sentinel1.rbLLink ← sentinel1.rbRLink ← NIL;
    sentinel2.rbColor ← Black; sentinel2.rbLLink ← sentinel2.rbRLink ← sentinel1;
    y ← sentinel1; z ← sentinel2 };

  CreateTable: PUBLIC ENTRY PROC[header: Node] RETURNS[Table] = {
    IF z = NIL THEN ERROR Error[notInitialized];
    header.rbColor ← Black; header.rbLLink ← NIL; header.rbRLink ← z;
    RETURN [[header]] };

  Insert: PUBLIC ENTRY PROC[self: Table, nodeToInsert: Node, insertKey: Key] = {
    -- Requires self.rbColor = self.rbRLink.rbColor = z.rbColor = Black, y.rbColor = Red.
    x, gg, g, f: Node;
    keyAlreadyPresent: BOOLEAN ← TRUE;
    c: Comparison ← greater; --since actual tree sits in right subtree of header node...
    -- search the tree, rebalancing on the way down.
    IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
    f ← self; x ← f.rbRLink;
    DO
      IF (x.rbLLink.rbColor=Red) AND (x.rbRLink.rbColor=Red) THEN {
        IF x=z THEN {
          -- an external node has been found; check to be sure that a duplicate key is not present.
          IF c = equal THEN EXIT;
          -- perform the insertion.
          x ← nodeToInsert;  keyAlreadyPresent ← FALSE;
          x.rbLLink ← z;  x.rbRLink ← z;
          IF c = less THEN f.rbLLink ← x ELSE f.rbRLink ← x;
          c ← equal;
        };--IF x is external
        -- do color flip.
        x.rbLLink.rbColor ← Black;  x.rbRLink.rbColor ← Black;  x.rbColor ← Red;
        IF f.rbColor=Red THEN {
          -- two reds in a row, so rebalance (gg may be self).
          g ← Balance[gg, g, f, x];
          x ← g;
        };--IF parent is red
      };--IF both children are red
      IF c = equal THEN EXIT;
      gg ← g; g ← f; f ← x;
      x ← IF (c ← Compare[insertKey, x]) = less THEN x.rbLLink ELSE x.rbRLink;
      ENDLOOP;
    self.rbRLink.rbColor ← Black; --root is always black
    IF keyAlreadyPresent THEN RETURN WITH ERROR DuplicateKey;
    };--Insert

  Delete: PUBLIC ENTRY PROC[self: Table, deleteKey: Key] RETURNS [deletedNode: Node] = {
    f, result, parentOfResult: Node;
    x, g, b: Node;
    c: Comparison;
    result ← NIL;
    IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
    f ← self;  x ← f.rbRLink;
    IF x = z THEN RETURN[NIL];
    y.rbColor ← Black; --children of external nodes have no red to contribute to rebalancing.
    -- Inject a red node at the root if necessary.
    IF (x.rbLLink.rbColor = Black) AND (x.rbRLink.rbColor = Black) THEN x.rbColor ← Red;
    IF (c ← Compare[deleteKey, x]) = equal THEN { result ← x;  parentOfResult ← f; };
    -- Search the tree for the symmetric order succecessor of the node containing key
    --(or the node itself if its RLink is z)
    DO BEGIN
      g ← f; f ← x;
      IF c = less THEN { b ← x.rbRLink;  x ← x.rbLLink; }
                   ELSE { b ← x.rbLLink;  x ← x.rbRLink; };
      -- If x is the node we're to return, save pointers to it and its parent for later.
      IF (c ← Compare[deleteKey, x]) = equal AND x # z THEN { result ← x;  parentOfResult ← f; };
      -- Note that if x is Red, no rotations happen now; this is what re-establishes the
      --properties of g and f eventually...
      IF x.rbColor=Red OR x.rbLLink.rbColor=Red OR x.rbRLink.rbColor=Red THEN LOOP;
      IF b.rbColor=Red THEN {
        -- Single rotation to move the red link b onto the search path.
        IF b = f.rbLLink THEN { f.rbLLink ← b.rbRLink;  b.rbRLink ← f; }
                        ELSE { f.rbRLink ← b.rbLLink;  b.rbLLink ← f; };
        f.rbColor ← Red;  b.rbColor ← Black;
        IF f = g.rbLLink THEN g.rbLLink ← b ELSE g.rbRLink ← b;
        -- Move back up the path to allow g and f to get re-established
        x ← b;  GOTO FixCompare;
      };--IF b.rbColor=Red
      IF x=z THEN EXIT;
      -- It is essential that this exit test be right here - after the single rotation,
      --but before the double rotations.
      x.rbColor ← Red;
      IF b.rbLLink.rbColor = Red THEN {
        b.rbLLink.rbColor ← Black;  x ← Balance[g, f, b, b.rbLLink];  GOTO FixCompare;
      };
      IF b.rbRLink.rbColor = Red THEN {
        b.rbRLink.rbColor ← Black;  x ← Balance[g, f, b, b.rbRLink];  GOTO FixCompare;
      };
      -- "Color flip"
      f.rbColor ← Black;  b.rbColor ← Red;
    EXITS
      FixCompare => c ← Compare[deleteKey, x];
    END ENDLOOP;
    self.rbRLink.rbColor ← Black;
    z.rbColor ← Black;  y.rbColor ← Red;--undo the color mess
    -- If search has been successful, f is now the symmetric order successor of result
    --(or result itself if result.rbRLink = z), and g is its parent.  f is Red, unless
    --it is also the root.  x and b are irrelevant.
    IF result # NIL THEN {
      -- detach f from the tree. 
      IF g.rbLLink = f THEN g.rbLLink ← z ELSE g.rbRLink ← z;
      -- if f is not the result node, splice f into tree in place of result.
      IF f # result THEN {
        IF parentOfResult.rbLLink = result THEN parentOfResult.rbLLink ← f
                                           ELSE parentOfResult.rbRLink ← f;
        f.rbLLink ← result.rbLLink;  f.rbRLink ← result.rbRLink;
        f.rbColor ← result.rbColor;
      };--IF f # result
    };--IF result # NIL
    RETURN[result];
    };--Delete

  Lookup: PUBLIC ENTRY PROC[self: Table, lookupKey: Key] RETURNS [equalNode: Node] = {
    c: Comparison;
    IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
    equalNode ← self.rbRLink;
    DO
      IF equalNode = z THEN RETURN [NIL];
      IF (c ← Compare[lookupKey, equalNode]) = equal THEN RETURN [equalNode];
      IF c = less THEN equalNode ← equalNode.rbLLink ELSE equalNode ← equalNode.rbRLink;
      ENDLOOP };

  Lookup3: PUBLIC ENTRY PROC[self: Table, lookupKey: Key]
   RETURNS[leftNode, equalNode, rightNode: Node] = {
    c: Comparison;
    IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
    equalNode ← self.rbRLink;
    DO
      IF equalNode = z THEN RETURN[leftNode, NIL, rightNode];
      IF (c ← Compare[lookupKey, equalNode]) = equal THEN EXIT;
      IF c = less THEN { rightNode ← equalNode;  equalNode ← equalNode.rbLLink }
        ELSE { leftNode ← equalNode;  equalNode ← equalNode.rbRLink; };
      ENDLOOP;
    IF equalNode.rbLLink # z THEN {
      leftNode ← equalNode.rbLLink;
      WHILE leftNode.rbRLink # z DO leftNode ← leftNode.rbRLink ENDLOOP };
    IF equalNode.rbRLink # z THEN {
      rightNode ← equalNode.rbRLink;
      WHILE rightNode.rbLLink # z DO rightNode ← rightNode.rbLLink ENDLOOP };
    RETURN[leftNode, equalNode, rightNode] };

  LookupSmallest: PUBLIC ENTRY PROC[self: Table] RETURNS[smallestNode: Node] = {
    IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
    smallestNode ← self.rbRLink;
    IF smallestNode=z THEN RETURN[NIL];
    UNTIL smallestNode.rbLLink=z DO  smallestNode ← smallestNode.rbLLink  ENDLOOP;
    RETURN[smallestNode] };

  LookupNextLarger: PUBLIC ENTRY PROC[self: Table, lookupKey: Key]
    RETURNS[largerNode: Node] = {
    x: Node ← self.rbRLink;
    IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
    largerNode ← NIL;
    UNTIL x=z DO
      IF Compare[lookupKey, x] = less THEN { largerNode ← x; x ← x.rbLLink }
        ELSE x ← x.rbRLink;
      ENDLOOP;
    RETURN[largerNode] };

  LookupLargest: PUBLIC ENTRY PROC[self: Table] RETURNS[largestNode: Node] = {
    IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
    largestNode ← self.rbRLink;
    IF largestNode=z THEN RETURN[NIL];
    UNTIL largestNode.rbRLink=z DO  largestNode ← largestNode.rbRLink  ENDLOOP;
    RETURN[largestNode] };

  LookupNextSmaller: PUBLIC ENTRY PROC [self: Table, lookupKey: Key]
    RETURNS[smallerNode: Node] = {
    x: Node ← self.rbRLink;
    IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
    smallerNode ← NIL;
    UNTIL x=z DO
      IF Compare[lookupKey, x] = greater THEN { smallerNode ← x; x ← x.rbRLink }
        ELSE x ← x.rbLLink;
      ENDLOOP;
    RETURN[smallerNode] };

  DestroyTable: PUBLIC--EXTERNAL--PROC [self: Table] = {
    IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
    [] ← CreateTable[self] };

  EnumerateIncreasing: PUBLIC ENTRY PROC [
    self: Table, procToApply: PROC [Node] RETURNS [--stop--BOOLEAN]] = {
    -- quick and dirty recursive implementation, has overhead of one procedure call
    --per call to procToApply.
    VisitSubtree: INTERNAL PROC [root: Node] RETURNS [--stop--BOOLEAN] = {
      -- never called with root = z; this saves half of the procedure calls.
      IF root.rbLLink # z AND VisitSubtree[root.rbLLink] THEN RETURN [TRUE];
      IF procToApply[root] THEN RETURN [TRUE];
      IF root.rbRLink # z THEN RETURN [VisitSubtree[root.rbRLink]] ELSE RETURN [FALSE] };
    IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
    IF self.rbRLink = z THEN RETURN;
    [] ← VisitSubtree [self.rbRLink] };

  Assert: PROC[p: BOOLEAN] = {
    IF NOT p THEN ERROR Error[badTable] };

  CheckTable: PUBLIC PROC [self: Table] = {
    --  ERRORs Error[badTable] if the table t is not well-formed.

    Check1: PROC[x: Node, maxKey: Key] RETURNS[Key, INTEGER, BOOLEAN] = {
      --  Returns largest key in subtree rooted at x, number of black arcs on paths to
      --external nodes from x, and whether or not this node is red.  ERRORs if it finds
      --two reds in a row, finds different distances to the external nodes (counting
      --only red arcs), or finds keys out of order.  If x is an internal node,
      --increments count.
      dl, dr: INTEGER;
      redChild: BOOLEAN;
      IF x = z THEN BEGIN
        RETURN[maxKey, 0, FALSE];
      END;
      [maxKey, dl, redChild] ← Check1[x.rbLLink, maxKey];
      Assert[~(redChild AND (x.rbColor=Red))];
      Assert[Compare[maxKey,x] = (IF count=0 THEN equal ELSE less)];
      count ← count + 1;
      [maxKey, dr, redChild] ← Check1[x.rbRLink, GetKey[x]];
      Assert[~(redChild AND (x.rbColor=Red))];
      Assert[dl=dr];
      RETURN[maxKey, dl+(IF x.rbColor=Black THEN 1 ELSE 0), x.rbColor=Red];
      };--Check1

    count: LONG INTEGER ← 0;
    -- check structure of the table header, sentinels, etc.
    Assert[self.rbLLink=NIL];
    Assert[z.rbLLink=y];
    Assert[z.rbRLink=y];
    -- The following assertions may fail during intermediate states of Delete.
    --    Assert[z.rbColor=Black];
    --    Assert[y.rbColor=Red];
    --    Assert[self.rbRLink.rbColor=Black];
    -- check structure of table.
    IF self.rbRLink # z THEN [] ← Check1[self.rbRLink, GetKey[LookupSmallest[self]]] };

  RootNode: PUBLIC ENTRY PROC [self: Table] RETURNS [rootNode: Node] = {
    IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
    RETURN [IF self.rbRLink = z THEN NIL ELSE self.rbRLink] };


  --  Private procedure, used by Insert (1 call), Delete (2 calls).

  Balance: INTERNAL PROC [gg, g, f, x: Node] RETURNS [Node] = {
    -- Balances a local portion of the tree.
    -- g is a child of gg (it may be either child).
    -- if g -rlink-> f -rlink-> x, or x <-llink- f <-llink- g, then only a single
    --rotation is needed; otherwise two rotations are used.
    -- when called from insert, g is black and both f and x are red.
    -- after rebalancing, the new g is black, and is the parent of both the new f and
    --the new x, which are both red.

    t: Node;
    tc: RedBlack;
    -- do first rotation if necessary
    IF f=g.rbLLink THEN {
      IF x=f.rbRLink THEN {
        f.rbRLink ← x.rbLLink;  x.rbLLink ← f;
        t ← f; f ← x; x ← t;
      };
    }
    ELSE {
      IF x=f.rbLLink THEN {
        f.rbLLink ← x.rbRLink;  x.rbRLink ← f;
        t ← f; f ← x; x ← t;
      };
    };
    -- do second rotation
    IF x=f.rbLLink THEN { g.rbLLink ← f.rbRLink;  f.rbRLink ← g; }
                   ELSE { g.rbRLink ← f.rbLLink;  f.rbLLink ← g; };
    -- update link in great grandparent
    IF g = gg.rbRLink THEN gg.rbRLink ← f ELSE gg.rbLLink ← f;
    -- do color swap (when called from Insert, g is always Black and f is always Red)
    tc ← g.rbColor;  g.rbColor ← f.rbColor;  f.rbColor ← tc;
    -- return g, new child of gg (name changed in second rotation)
    RETURN[f] };

END.--RedBlackTreeImpl

CHANGE LOG

Created by MBrown on January 18, 1980  10:58 AM
-- It worked the first time!

Changed by MBrown on January 19, 1980  8:50 PM
-- Implemented EnumerateIncreasing.  Fixed a bug in CheckTable that defeated checking of
--key order.  Made changes to allow MinusInfinity as a Key.  Changed Insert to use
--keyPresent flag, as in Guibas-Sedgewick Program 5.

Changed by MBrown on 20-Jan-80 13:28
-- Changed Balance to compare pointers rather than keys, and eliminated the code for
--"swap g and f" since only g is returned from Balance.

Changed by MBrown on March 4, 1980  2:17 PM
-- Moved inline definitions of Node structure to OrderedSymbolTable, which we now OPEN.

Changed by MBrown on April 13, 1980  4:21 PM
-- Coded Delete.  Made LookupSmallest and LookupNextLarger PUBLIC, since they may be
--useful in testing Delete.

Changed by MBrown on April 13, 1980  6:01 PM
-- Delete seemed to work the first time, a surprise.  I removed some the redundant
--checks that were there for debugging; the old version will be called
--RedBlackCheckedImpl.

Changed by MBrown on May 1, 1980  10:03 PM
-- Changed Delete to avoid a search from the root to find the parent of the result node.
--This involved actually understanding the algorithm; "cleaning up" the way rebalancing
--is done introduced several bugs.

Changed by MBrown on May 7, 1980  6:36 PM
-- Added DestroyTable and Size.  We now allocate/free TableObjects using procs in the
--OrderedSymbolTable interface.  Renamed module.

Changed by MBrown on August 11, 1980  10:17 PM
-- Major revisions (prompted by first real client, the Juniper locks code, which will
--make further changes for its special needs).  We take the key comparison function from
--the interface, instead of requiring "<" to be applicable.  This means that comparisons
--may be expensive, so we save the result in a local variable to avoid doing twice the
--required number.  MinusInfinity is no longer treated specially in the code.  We do
--explicit field selections for the links and color fields, instead of using inline
--procs from the interface.  Added Lookup3 proc for Juniper locks application.  We now
--pass in ALL storage, including storage for table objects.  y and z now point to nodes
--in the global frame of this module, rather than being replicated for each table; thus
--it is now unsafe for multiple processes to be inside of an instance of this module,
--even if they operate on distinct tables (it was always unsafe to perform concurrent
--operations on a single table).

Changed by MBrown on October 29, 1980  8:42 PM
-- Eliminated all reference to MinusInfinity (it was being used in CheckTable).

Changed by MBrown on January 7, 1981  9:47 PM
-- Changes for collectible storage, module monitor.

Changed by MBrown on March 1, 1982 3:30 pm
-- New version for use in Alpine.  No allocation done in package, even for sentinels.  No
--more "AssignKey[,]", which was used only for sentinels; instead, check for z explicitly. No
--more "Key[]", and Compare now takes a Key and a Node.  A Table is just a Node; no count
--of number of nods in a table is kept in the table.
-- Other variants that are possible (but hopefully are not required):
-- (1) use < and = instead of 3-way compares.
-- (2) use object monitor (requires replicating z and y per-table), or no monitor.
-- (3) Table and Node are different types (this is required for object monitor, to hold y and z.)

Changed by MBrown on March 2, 1982 4:20 pm
-- Table is a RECORD [Node].  Clean up errors in interface.

Changed by MBrown on June 28, 1982 11:12 am
-- CEDAR implementation.

Changed by MBrown on August 26, 1982 11:11 pm
-- Use Environment.Comparison.

Changed by MBrown on November 10, 1982 6:34 pm
-- DuplicateKey is now an ERROR.

Changed by MBrown on October 20, 1983 11:38 pm
-- Conversion to Cedar 5.0.