-- File: RedBlackTreeImpl.mesa
-- Last edited by MBrown on November 10, 1982 6:33 pm
-- Notes on the data structure
-- The color of a node should be thought of as the color of the edge coming into the
--node. The general color invariants for red-black trees are that (1) all external nodes
--are black, while internals are red or black, and (2) all paths from an internal node
--to an external node contain the same number of black arcs.
-- The tree edges of a 2-3-4 tree become black arcs in the binarized red-black
--representation, while the internal 2-, 3-, and 4-nodes are represented by connected
--red subtrees: the degenerate one with one node, both of the possible two node trees,
--and the completely balanced tree on three nodes. Another way of stating this
--invariant is to say that no path contains two consecutive red arcs.
-- The top-down insertion algorithm below is from Program 5 of "A Dichromatic Framework
--for Balanced Trees" by Leo Guibas and Bob Sedgewick. The deletion algorithm
--incorporates several changes to the algorithm in that paper. This program was
--originally adapted from Dave Gifford's StringStorageImpl.Mesa.
DIRECTORY
Environment USING [Comparison],
OrderedSymbolTable USING[
Node, Key, GetKey, Compare, Table, ErrorType];
RedBlackTreeImpl: CEDAR MONITOR
IMPORTS OrderedSymbolTable
EXPORTS OrderedSymbolTable
SHARES OrderedSymbolTable
= BEGIN OPEN OrderedSymbolTable;
Comparison: TYPE = Environment.Comparison;
RedBlack: TYPE = BOOL;
Red: BOOLEAN = TRUE;
Black: BOOLEAN = FALSE;
DuplicateKey: PUBLIC ERROR = CODE;
Error: PUBLIC ERROR [ec: ErrorType] = CODE;
doChecking: BOOLEAN = TRUE;
-- forces extra validity checking of inputs.
-- Shared variables
y: Node;
z: Node ← NIL;
-- Procedures exported to OrderedSymbolTable
Initialize: PUBLIC ENTRY PROC [sentinel1, sentinel2: Node] = {
sentinel1.rbColor ← Red; sentinel1.rbLLink ← sentinel1.rbRLink ← NIL;
sentinel2.rbColor ← Black; sentinel2.rbLLink ← sentinel2.rbRLink ← sentinel1;
y ← sentinel1; z ← sentinel2 };
CreateTable: PUBLIC ENTRY PROC[header: Node] RETURNS[Table] = {
IF z = NIL THEN ERROR Error[notInitialized];
header.rbColor ← Black; header.rbLLink ← NIL; header.rbRLink ← z;
RETURN [[header]] };
Insert: PUBLIC ENTRY PROC[self: Table, nodeToInsert: Node, insertKey: Key] = {
-- Requires self.rbColor = self.rbRLink.rbColor = z.rbColor = Black, y.rbColor = Red.
x, gg, g, f: Node;
keyAlreadyPresent: BOOLEAN ← TRUE;
c: Comparison ← greater; --since actual tree sits in right subtree of header node...
-- search the tree, rebalancing on the way down.
IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
f ← self; x ← f.rbRLink;
DO
IF (x.rbLLink.rbColor=Red) AND (x.rbRLink.rbColor=Red) THEN {
IF x=z THEN {
-- an external node has been found; check to be sure that a duplicate key is not present.
IF c = equal THEN EXIT;
-- perform the insertion.
x ← nodeToInsert; keyAlreadyPresent ← FALSE;
x.rbLLink ← z; x.rbRLink ← z;
IF c = less THEN f.rbLLink ← x ELSE f.rbRLink ← x;
c ← equal;
};--IF x is external
-- do color flip.
x.rbLLink.rbColor ← Black; x.rbRLink.rbColor ← Black; x.rbColor ← Red;
IF f.rbColor=Red THEN {
-- two reds in a row, so rebalance (gg may be self).
g ← Balance[gg, g, f, x];
x ← g;
};--IF parent is red
};--IF both children are red
IF c = equal THEN EXIT;
gg ← g; g ← f; f ← x;
x ← IF (c ← Compare[insertKey, x]) = less THEN x.rbLLink ELSE x.rbRLink;
ENDLOOP;
self.rbRLink.rbColor ← Black; --root is always black
IF keyAlreadyPresent THEN RETURN WITH ERROR DuplicateKey;
};--Insert
Delete: PUBLIC ENTRY PROC[self: Table, deleteKey: Key] RETURNS [deletedNode: Node] = {
f, result, parentOfResult: Node;
x, g, b: Node;
c: Comparison;
result ← NIL;
IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
f ← self; x ← f.rbRLink;
IF x = z THEN RETURN[NIL];
y.rbColor ← Black; --children of external nodes have no red to contribute to rebalancing.
-- Inject a red node at the root if necessary.
IF (x.rbLLink.rbColor = Black) AND (x.rbRLink.rbColor = Black) THEN x.rbColor ← Red;
IF (c ← Compare[deleteKey, x]) = equal THEN { result ← x; parentOfResult ← f; };
-- Search the tree for the symmetric order succecessor of the node containing key
--(or the node itself if its RLink is z)
DO BEGIN
g ← f; f ← x;
IF c = less THEN { b ← x.rbRLink; x ← x.rbLLink; }
ELSE { b ← x.rbLLink; x ← x.rbRLink; };
-- If x is the node we're to return, save pointers to it and its parent for later.
IF (c ← Compare[deleteKey, x]) = equal AND x # z THEN { result ← x; parentOfResult ← f; };
-- Note that if x is Red, no rotations happen now; this is what re-establishes the
--properties of g and f eventually...
IF x.rbColor=Red OR x.rbLLink.rbColor=Red OR x.rbRLink.rbColor=Red THEN LOOP;
IF b.rbColor=Red THEN {
-- Single rotation to move the red link b onto the search path.
IF b = f.rbLLink THEN { f.rbLLink ← b.rbRLink; b.rbRLink ← f; }
ELSE { f.rbRLink ← b.rbLLink; b.rbLLink ← f; };
f.rbColor ← Red; b.rbColor ← Black;
IF f = g.rbLLink THEN g.rbLLink ← b ELSE g.rbRLink ← b;
-- Move back up the path to allow g and f to get re-established
x ← b; GOTO FixCompare;
};--IF b.rbColor=Red
IF x=z THEN EXIT;
-- It is essential that this exit test be right here - after the single rotation,
--but before the double rotations.
x.rbColor ← Red;
IF b.rbLLink.rbColor = Red THEN {
b.rbLLink.rbColor ← Black; x ← Balance[g, f, b, b.rbLLink]; GOTO FixCompare;
};
IF b.rbRLink.rbColor = Red THEN {
b.rbRLink.rbColor ← Black; x ← Balance[g, f, b, b.rbRLink]; GOTO FixCompare;
};
-- "Color flip"
f.rbColor ← Black; b.rbColor ← Red;
EXITS
FixCompare => c ← Compare[deleteKey, x];
END ENDLOOP;
self.rbRLink.rbColor ← Black;
z.rbColor ← Black; y.rbColor ← Red;--undo the color mess
-- If search has been successful, f is now the symmetric order successor of result
--(or result itself if result.rbRLink = z), and g is its parent. f is Red, unless
--it is also the root. x and b are irrelevant.
IF result # NIL THEN {
-- detach f from the tree.
IF g.rbLLink = f THEN g.rbLLink ← z ELSE g.rbRLink ← z;
-- if f is not the result node, splice f into tree in place of result.
IF f # result THEN {
IF parentOfResult.rbLLink = result THEN parentOfResult.rbLLink ← f
ELSE parentOfResult.rbRLink ← f;
f.rbLLink ← result.rbLLink; f.rbRLink ← result.rbRLink;
f.rbColor ← result.rbColor;
};--IF f # result
};--IF result # NIL
RETURN[result];
};--Delete
Lookup: PUBLIC ENTRY PROC[self: Table, lookupKey: Key] RETURNS [equalNode: Node] = {
c: Comparison;
IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
equalNode ← self.rbRLink;
DO
IF equalNode = z THEN RETURN [NIL];
IF (c ← Compare[lookupKey, equalNode]) = equal THEN RETURN [equalNode];
IF c = less THEN equalNode ← equalNode.rbLLink ELSE equalNode ← equalNode.rbRLink;
ENDLOOP };
Lookup3: PUBLIC ENTRY PROC[self: Table, lookupKey: Key]
RETURNS[leftNode, equalNode, rightNode: Node] = {
c: Comparison;
IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
equalNode ← self.rbRLink;
DO
IF equalNode = z THEN RETURN[leftNode, NIL, rightNode];
IF (c ← Compare[lookupKey, equalNode]) = equal THEN EXIT;
IF c = less THEN { rightNode ← equalNode; equalNode ← equalNode.rbLLink }
ELSE { leftNode ← equalNode; equalNode ← equalNode.rbRLink; };
ENDLOOP;
IF equalNode.rbLLink # z THEN {
leftNode ← equalNode.rbLLink;
WHILE leftNode.rbRLink # z DO leftNode ← leftNode.rbRLink ENDLOOP };
IF equalNode.rbRLink # z THEN {
rightNode ← equalNode.rbRLink;
WHILE rightNode.rbLLink # z DO rightNode ← rightNode.rbLLink ENDLOOP };
RETURN[leftNode, equalNode, rightNode] };
LookupSmallest: PUBLIC ENTRY PROC[self: Table] RETURNS[smallestNode: Node] = {
IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
smallestNode ← self.rbRLink;
IF smallestNode=z THEN RETURN[NIL];
UNTIL smallestNode.rbLLink=z DO smallestNode ← smallestNode.rbLLink ENDLOOP;
RETURN[smallestNode] };
LookupNextLarger: PUBLIC ENTRY PROC[self: Table, lookupKey: Key]
RETURNS[largerNode: Node] = {
x: Node ← self.rbRLink;
IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
largerNode ← NIL;
UNTIL x=z DO
IF Compare[lookupKey, x] = less THEN { largerNode ← x; x ← x.rbLLink }
ELSE x ← x.rbRLink;
ENDLOOP;
RETURN[largerNode] };
LookupLargest: PUBLIC ENTRY PROC[self: Table] RETURNS[largestNode: Node] = {
IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
largestNode ← self.rbRLink;
IF largestNode=z THEN RETURN[NIL];
UNTIL largestNode.rbRLink=z DO largestNode ← largestNode.rbRLink ENDLOOP;
RETURN[largestNode] };
LookupNextSmaller: PUBLIC ENTRY PROC [self: Table, lookupKey: Key]
RETURNS[smallerNode: Node] = {
x: Node ← self.rbRLink;
IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
smallerNode ← NIL;
UNTIL x=z DO
IF Compare[lookupKey, x] = greater THEN { smallerNode ← x; x ← x.rbRLink }
ELSE x ← x.rbLLink;
ENDLOOP;
RETURN[smallerNode] };
DestroyTable: PUBLIC--EXTERNAL--PROC [self: Table] = {
IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
[] ← CreateTable[self] };
EnumerateIncreasing: PUBLIC ENTRY PROC [
self: Table, procToApply: PROC [Node] RETURNS [--stop--BOOLEAN]] = {
-- quick and dirty recursive implementation, has overhead of one procedure call
--per call to procToApply.
VisitSubtree: INTERNAL PROC [root: Node] RETURNS [--stop--BOOLEAN] = {
-- never called with root = z; this saves half of the procedure calls.
IF root.rbLLink # z AND VisitSubtree[root.rbLLink] THEN RETURN [TRUE];
IF procToApply[root] THEN RETURN [TRUE];
IF root.rbRLink # z THEN RETURN [VisitSubtree[root.rbRLink]] ELSE RETURN [FALSE] };
IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
IF self.rbRLink = z THEN RETURN;
[] ← VisitSubtree [self.rbRLink] };
Assert: PROC[p: BOOLEAN] = {
IF NOT p THEN ERROR Error[badTable] };
CheckTable: PUBLIC PROC [self: Table] = {
-- ERRORs Error[badTable] if the table t is not well-formed.
Check1: PROC[x: Node, maxKey: Key] RETURNS[Key, INTEGER, BOOLEAN] = {
-- Returns largest key in subtree rooted at x, number of black arcs on paths to
--external nodes from x, and whether or not this node is red. ERRORs if it finds
--two reds in a row, finds different distances to the external nodes (counting
--only red arcs), or finds keys out of order. If x is an internal node,
--increments count.
dl, dr: INTEGER;
redChild: BOOLEAN;
IF x = z THEN BEGIN
RETURN[maxKey, 0, FALSE];
END;
[maxKey, dl, redChild] ← Check1[x.rbLLink, maxKey];
Assert[~(redChild AND (x.rbColor=Red))];
Assert[Compare[maxKey,x] = (IF count=0 THEN equal ELSE less)];
count ← count + 1;
[maxKey, dr, redChild] ← Check1[x.rbRLink, GetKey[x]];
Assert[~(redChild AND (x.rbColor=Red))];
Assert[dl=dr];
RETURN[maxKey, dl+(IF x.rbColor=Black THEN 1 ELSE 0), x.rbColor=Red];
};--Check1
count: LONG INTEGER ← 0;
-- check structure of the table header, sentinels, etc.
Assert[self.rbLLink=NIL];
Assert[z.rbLLink=y];
Assert[z.rbRLink=y];
-- The following assertions may fail during intermediate states of Delete.
-- Assert[z.rbColor=Black];
-- Assert[y.rbColor=Red];
-- Assert[self.rbRLink.rbColor=Black];
-- check structure of table.
IF self.rbRLink # z THEN [] ← Check1[self.rbRLink, GetKey[LookupSmallest[self]]] };
RootNode: PUBLIC ENTRY PROC [self: Table] RETURNS [rootNode: Node] = {
IF doChecking AND self.rbLLink # NIL THEN ERROR Error[badTable];
RETURN [IF self.rbRLink = z THEN NIL ELSE self.rbRLink] };
-- Private procedure, used by Insert (1 call), Delete (2 calls).
Balance: INTERNAL PROC [gg, g, f, x: Node] RETURNS [Node] = {
-- Balances a local portion of the tree.
-- g is a child of gg (it may be either child).
-- if g -rlink-> f -rlink-> x, or x <-llink- f <-llink- g, then only a single
--rotation is needed; otherwise two rotations are used.
-- when called from insert, g is black and both f and x are red.
-- after rebalancing, the new g is black, and is the parent of both the new f and
--the new x, which are both red.
t: Node;
tc: RedBlack;
-- do first rotation if necessary
IF f=g.rbLLink THEN {
IF x=f.rbRLink THEN {
f.rbRLink ← x.rbLLink; x.rbLLink ← f;
t ← f; f ← x; x ← t;
};
}
ELSE {
IF x=f.rbLLink THEN {
f.rbLLink ← x.rbRLink; x.rbRLink ← f;
t ← f; f ← x; x ← t;
};
};
-- do second rotation
IF x=f.rbLLink THEN { g.rbLLink ← f.rbRLink; f.rbRLink ← g; }
ELSE { g.rbRLink ← f.rbLLink; f.rbLLink ← g; };
-- update link in great grandparent
IF g = gg.rbRLink THEN gg.rbRLink ← f ELSE gg.rbLLink ← f;
-- do color swap (when called from Insert, g is always Black and f is always Red)
tc ← g.rbColor; g.rbColor ← f.rbColor; f.rbColor ← tc;
-- return g, new child of gg (name changed in second rotation)
RETURN[f] };
END.--RedBlackTreeImpl
CHANGE LOG
Created by MBrown on January 18, 1980 10:58 AM
-- It worked the first time!
Changed by MBrown on January 19, 1980 8:50 PM
-- Implemented EnumerateIncreasing. Fixed a bug in CheckTable that defeated checking of
--key order. Made changes to allow MinusInfinity as a Key. Changed Insert to use
--keyPresent flag, as in Guibas-Sedgewick Program 5.
Changed by MBrown on 20-Jan-80 13:28
-- Changed Balance to compare pointers rather than keys, and eliminated the code for
--"swap g and f" since only g is returned from Balance.
Changed by MBrown on March 4, 1980 2:17 PM
-- Moved inline definitions of Node structure to OrderedSymbolTable, which we now OPEN.
Changed by MBrown on April 13, 1980 4:21 PM
-- Coded Delete. Made LookupSmallest and LookupNextLarger PUBLIC, since they may be
--useful in testing Delete.
Changed by MBrown on April 13, 1980 6:01 PM
-- Delete seemed to work the first time, a surprise. I removed some the redundant
--checks that were there for debugging; the old version will be called
--RedBlackCheckedImpl.
Changed by MBrown on May 1, 1980 10:03 PM
-- Changed Delete to avoid a search from the root to find the parent of the result node.
--This involved actually understanding the algorithm; "cleaning up" the way rebalancing
--is done introduced several bugs.
Changed by MBrown on May 7, 1980 6:36 PM
-- Added DestroyTable and Size. We now allocate/free TableObjects using procs in the
--OrderedSymbolTable interface. Renamed module.
Changed by MBrown on August 11, 1980 10:17 PM
-- Major revisions (prompted by first real client, the Juniper locks code, which will
--make further changes for its special needs). We take the key comparison function from
--the interface, instead of requiring "<" to be applicable. This means that comparisons
--may be expensive, so we save the result in a local variable to avoid doing twice the
--required number. MinusInfinity is no longer treated specially in the code. We do
--explicit field selections for the links and color fields, instead of using inline
--procs from the interface. Added Lookup3 proc for Juniper locks application. We now
--pass in ALL storage, including storage for table objects. y and z now point to nodes
--in the global frame of this module, rather than being replicated for each table; thus
--it is now unsafe for multiple processes to be inside of an instance of this module,
--even if they operate on distinct tables (it was always unsafe to perform concurrent
--operations on a single table).
Changed by MBrown on October 29, 1980 8:42 PM
-- Eliminated all reference to MinusInfinity (it was being used in CheckTable).
Changed by MBrown on January 7, 1981 9:47 PM
-- Changes for collectible storage, module monitor.
Changed by MBrown on March 1, 1982 3:30 pm
-- New version for use in Alpine. No allocation done in package, even for sentinels. No
--more "AssignKey[,]", which was used only for sentinels; instead, check for z explicitly. No
--more "Key[]", and Compare now takes a Key and a Node. A Table is just a Node; no count
--of number of nods in a table is kept in the table.
-- Other variants that are possible (but hopefully are not required):
-- (1) use < and = instead of 3-way compares.
-- (2) use object monitor (requires replicating z and y per-table), or no monitor.
-- (3) Table and Node are different types (this is required for object monitor, to hold y and z.)
Changed by MBrown on March 2, 1982 4:20 pm
-- Table is a RECORD [Node]. Clean up errors in interface.
Changed by MBrown on June 28, 1982 11:12 am
-- CEDAR implementation.
Changed by MBrown on August 26, 1982 11:11 pm
-- Use Environment.Comparison.
Changed by MBrown on November 10, 1982 6:34 pm
-- DuplicateKey is now an ERROR.