DIRECTORY Basics USING [Comparison], RedBlackTree; RedBlackTreeImpl: CEDAR MONITOR LOCKS self USING self: Table EXPORTS RedBlackTree SHARES RedBlackTree = BEGIN OPEN RedBlackTree; Comparison: TYPE = Basics.Comparison; RedBlack: TYPE = BOOL; Red: BOOL = TRUE; Black: BOOL = FALSE; DuplicateKey: PUBLIC ERROR = CODE; BadTable: PUBLIC ERROR = CODE; doChecking: BOOL = TRUE; Private: TYPE = REF PrivateRep; PrivateRep: TYPE = RECORD [ size: INT, getKey: GetKey, compare: Compare, y: Node, -- This is a dummy node considered to be less than the least real node z: Node]; -- This is a dummy node considered to be greater than the greatest real node InnerProc: TYPE = PROC [private: Private, y: Node, z: Node, root: Node]; DoInner: ENTRY PROC [self: Table, inner: InnerProc] = TRUSTED { ENABLE UNWIND => NULL; private: Private _ LOOPHOLE[self.private]; root: Node _ self.root; IF root = NIL OR root.rbLLink # NIL THEN RETURN WITH ERROR BadTable; inner[private, private.y, private.z, root]; }; Create: PUBLIC PROC [getKey: GetKey, compare: Compare] RETURNS [table: Table] = { private: Private _ NEW[PrivateRep _ [size: 0, getKey: getKey, compare: compare, y: NEW[NodeRep], z: NEW[NodeRep]]]; root: Node _ NEW[NodeRep]; y: Node _ private.y; z: Node _ private.z; table _ NEW[TableRep]; table.root _ root; table.private _ private; root.rbColor _ Black; root.rbLLink _ NIL; root.rbRLink _ z; y.rbColor _ Red; y.rbLLink _ y.rbRLink _ NIL; z.rbColor _ Black; z.rbLLink _ z.rbRLink _ y; }; Size: PUBLIC PROC [self: Table] RETURNS [size: INT _ 0] = { inner: InnerProc = { size _ private.size; }; IF self # NIL THEN DoInner[self, inner]; }; Insert: PUBLIC PROC [self: Table, dataToInsert: UserData, insertKey: Key] = { node: Node _ NEW[NodeRep _ [data: dataToInsert]]; InsertNode[self, node, insertKey]; }; InsertNode: PUBLIC PROC [self: Table, nodeToInsert: Node, insertKey: Key] = { keyAlreadyPresent: BOOL _ FALSE; inner: InnerProc = { x, gg, g, f: Node; c: Comparison _ greater; --since actual tree sits in right subtree of header node... f _ root; x _ f.rbRLink; DO IF (x.rbLLink.rbColor=Red) AND (x.rbRLink.rbColor=Red) THEN { IF x=z THEN { IF c = equal THEN {keyAlreadyPresent _ TRUE; EXIT}; x _ nodeToInsert; x.rbLLink _ z; x.rbRLink _ z; IF c = less THEN f.rbLLink _ x ELSE f.rbRLink _ x; c _ equal; private.size _ private.size + 1; }; x.rbLLink.rbColor _ Black; x.rbRLink.rbColor _ Black; x.rbColor _ Red; IF f.rbColor=Red THEN { g _ Balance[gg, g, f, x]; x _ g; }; }; IF c = equal THEN EXIT; gg _ g; g _ f; f _ x; x _ IF (c _ private.compare[insertKey, x.data]) = less THEN x.rbLLink ELSE x.rbRLink; ENDLOOP; root.rbRLink.rbColor _ Black; --root is always black }; DoInner[self, inner]; IF keyAlreadyPresent THEN ERROR DuplicateKey; }; Delete: PUBLIC PROC[self: Table, deleteKey: Key] RETURNS [deletedNode: Node _ NIL] = { inner: InnerProc = { f, result, parentOfResult: Node; x, g, b: Node; c: Comparison; result _ NIL; f _ root; x _ f.rbRLink; IF x = z THEN RETURN; y.rbColor _ Black; --children of external nodes have no red to contribute to rebalancing. IF (x.rbLLink.rbColor = Black) AND (x.rbRLink.rbColor = Black) THEN x.rbColor _ Red; IF (c _ private.compare[deleteKey, x.data]) = equal THEN { result _ x; parentOfResult _ f; }; DO { g _ f; f _ x; IF c = less THEN {b _ x.rbRLink; x _ x.rbLLink} ELSE {b _ x.rbLLink; x _ x.rbRLink}; IF x # z AND (c _ private.compare[deleteKey, x.data]) = equal THEN { result _ x; parentOfResult _ f; }; IF x.rbColor=Red OR x.rbLLink.rbColor=Red OR x.rbRLink.rbColor=Red THEN LOOP; IF b.rbColor=Red THEN { IF b = f.rbLLink THEN { f.rbLLink _ b.rbRLink; b.rbRLink _ f; } ELSE { f.rbRLink _ b.rbLLink; b.rbLLink _ f; }; f.rbColor _ Red; b.rbColor _ Black; IF f = g.rbLLink THEN g.rbLLink _ b ELSE g.rbRLink _ b; x _ b; GOTO FixCompare; };--IF b.rbColor=Red IF x=z THEN EXIT; x.rbColor _ Red; IF b.rbLLink.rbColor = Red THEN { b.rbLLink.rbColor _ Black; x _ Balance[g, f, b, b.rbLLink]; GOTO FixCompare; }; IF b.rbRLink.rbColor = Red THEN { b.rbRLink.rbColor _ Black; x _ Balance[g, f, b, b.rbRLink]; GOTO FixCompare; }; f.rbColor _ Black; b.rbColor _ Red; EXITS FixCompare => c _ private.compare[deleteKey, x.data]; }; ENDLOOP; root.rbRLink.rbColor _ Black; z.rbColor _ Black; y.rbColor _ Red;--undo the color mess IF result # NIL THEN { IF g.rbLLink = f THEN g.rbLLink _ z ELSE g.rbRLink _ z; IF f # result THEN { IF parentOfResult.rbLLink = result THEN parentOfResult.rbLLink _ f ELSE parentOfResult.rbRLink _ f; f.rbLLink _ result.rbLLink; f.rbRLink _ result.rbRLink; f.rbColor _ result.rbColor; }; private.size _ private.size - 1; }; deletedNode _ result; }; DoInner[self, inner]; }; LookupNode: PUBLIC PROC[self: Table, lookupKey: Key] RETURNS [node: Node _ NIL] = { inner: InnerProc = { node _ root.rbRLink; DO IF node = z THEN {node _ NIL; EXIT}; SELECT private.compare[lookupKey, node.data] FROM equal => EXIT; less => node _ node.rbLLink; greater => node _ node.rbRLink; ENDCASE => ERROR; ENDLOOP; }; DoInner[self, inner]; }; Lookup: PUBLIC PROC[self: Table, lookupKey: Key] RETURNS [data: UserData _ NIL] = { inner: InnerProc = { equalNode: Node _ root.rbRLink; DO IF equalNode = z THEN RETURN; SELECT private.compare[lookupKey, equalNode.data] FROM equal => EXIT; less => equalNode _ equalNode.rbLLink; greater => equalNode _ equalNode.rbRLink; ENDCASE => ERROR; ENDLOOP; data _ equalNode.data; }; DoInner[self, inner]; }; Lookup3: PUBLIC PROC[self: Table, lookupKey: Key] RETURNS[leftData, equalData, rightData: UserData _ NIL] = { inner: InnerProc = { equalNode: Node _ root.rbRLink; leftNode: Node _ NIL; rightNode: Node _ NIL; DO IF equalNode = z THEN {equalNode _ NIL; EXIT}; SELECT private.compare[lookupKey, equalNode.data] FROM equal => { IF equalNode.rbLLink # z THEN { leftNode _ equalNode.rbLLink; WHILE leftNode.rbRLink # z DO leftNode _ leftNode.rbRLink ENDLOOP; }; IF equalNode.rbRLink # z THEN { rightNode _ equalNode.rbRLink; WHILE rightNode.rbLLink # z DO rightNode _ rightNode.rbLLink ENDLOOP; }; EXIT; }; less => {rightNode _ equalNode; equalNode _ equalNode.rbLLink}; greater => {leftNode _ equalNode; equalNode _ equalNode.rbRLink;}; ENDCASE => ERROR; ENDLOOP; IF equalNode # NIL THEN equalData _ equalNode.data; IF leftNode # NIL THEN leftData _ leftNode.data; IF rightNode # NIL THEN rightData _ rightNode.data; }; DoInner[self, inner]; }; LookupSmallest: PUBLIC PROC [self: Table] RETURNS [data: UserData _ NIL] = { inner: InnerProc = { smallestNode: Node _ root.rbRLink; IF smallestNode=z THEN RETURN; UNTIL smallestNode.rbLLink=z DO smallestNode _ smallestNode.rbLLink ENDLOOP; data _ smallestNode.data; }; DoInner[self, inner]; }; LookupNextLarger: PUBLIC PROC [self: Table, lookupKey: Key] RETURNS [data: UserData _ NIL] = { inner: InnerProc = { largerNode: Node _ NIL; x: Node _ root.rbRLink; UNTIL x=z DO IF private.compare[lookupKey, x.data] = less THEN {largerNode _ x; x _ x.rbLLink} ELSE x _ x.rbRLink; ENDLOOP; IF largerNode # NIL THEN data _ largerNode.data; }; DoInner[self, inner]; }; LookupLargest: PUBLIC PROC[self: Table] RETURNS [data: UserData _ NIL] = { inner: InnerProc = { largestNode: Node _ root.rbRLink; IF largestNode=z THEN {largestNode _ NIL; RETURN}; UNTIL largestNode.rbRLink=z DO largestNode _ largestNode.rbRLink ENDLOOP; data _ largestNode.data; }; DoInner[self, inner]; }; LookupNextSmaller: PUBLIC PROC [self: Table, lookupKey: Key] RETURNS [data: UserData _ NIL] = { inner: InnerProc = { smallerNode: Node _ NIL; x: Node _ root.rbRLink; UNTIL x=z DO IF private.compare[lookupKey, x.data] = greater THEN { smallerNode _ x; x _ x.rbRLink } ELSE x _ x.rbLLink; ENDLOOP; IF smallerNode # NIL THEN data _ smallerNode.data; }; DoInner[self, inner]; }; DestroyTable: PUBLIC PROC [self: Table] = { inner: InnerProc = { new: Table _ Create[private.getKey, private.compare]; self.private _ new.private; self.root _ new.root; }; DoInner[self, inner]; }; EnumerateIncreasing: PUBLIC PROC [self: Table, procToApply: EachNode] = { inner: InnerProc = { VisitSubtree: PROC [node: Node] RETURNS [stop: BOOL _ FALSE] = { IF node.rbLLink # z AND VisitSubtree[node.rbLLink] THEN RETURN [TRUE]; IF procToApply[node.data] THEN RETURN [TRUE]; IF node.rbRLink # z THEN RETURN [VisitSubtree[node.rbRLink]]; }; IF root = NIL THEN RETURN; IF root.rbRLink = z THEN RETURN; [] _ VisitSubtree[root.rbRLink]; }; DoInner[self, inner]; }; EnumerateDecreasing: PUBLIC PROC [self: Table, procToApply: EachNode] = { inner: InnerProc = { VisitSubtree: PROC [node: Node] RETURNS [stop: BOOL _ FALSE] = { IF node.rbRLink # y AND VisitSubtree[node.rbRLink] THEN RETURN [TRUE]; IF procToApply[node.data] THEN RETURN [TRUE]; IF node.rbLLink # y THEN RETURN [VisitSubtree[node.rbLLink]]; }; IF root = NIL THEN RETURN; IF root.rbRLink = y THEN RETURN; [] _ VisitSubtree[root.rbRLink]; }; DoInner[self, inner]; }; CheckTable: PUBLIC PROC [self: Table] = { inner: InnerProc = { Assert: PROC [p: BOOL] = {IF NOT p THEN ERROR BadTable }; Check1: PROC [x: Node, maxKey: Key] RETURNS [Key, INTEGER, BOOL] = { dl, dr: INTEGER; redChild: BOOL; IF x = z THEN RETURN[maxKey, 0, FALSE]; [maxKey, dl, redChild] _ Check1[x.rbLLink, maxKey]; Assert[~(redChild AND (x.rbColor=Red))]; Assert[private.compare[maxKey,x.data] = (IF count=0 THEN equal ELSE less)]; count _ count + 1; [maxKey, dr, redChild] _ Check1[x.rbRLink, private.getKey[x.data]]; Assert[~(redChild AND (x.rbColor=Red))]; Assert[dl=dr]; RETURN[maxKey, dl+(IF x.rbColor=Black THEN 1 ELSE 0), x.rbColor=Red]; };--Check1 count: LONG INTEGER _ 0; Assert[z.rbLLink=y]; Assert[z.rbRLink=y]; IF root.rbRLink # z THEN { smallest: Node _ root.rbRLink; UNTIL smallest.rbLLink=z DO smallest _ smallest.rbLLink ENDLOOP; [] _ Check1[root.rbRLink, private.getKey[smallest.data]]; }; }; DoInner[self, inner]; }; Balance: PROC [gg, g, f, x: Node] RETURNS [Node] = { t: Node; tc: RedBlack; IF f=g.rbLLink THEN { IF x=f.rbRLink THEN { f.rbRLink _ x.rbLLink; x.rbLLink _ f; t _ f; f _ x; x _ t; }; } ELSE { IF x=f.rbLLink THEN { f.rbLLink _ x.rbRLink; x.rbRLink _ f; t _ f; f _ x; x _ t; }; }; IF x=f.rbLLink THEN { g.rbLLink _ f.rbRLink; f.rbRLink _ g; } ELSE { g.rbRLink _ f.rbLLink; f.rbLLink _ g; }; IF g = gg.rbRLink THEN gg.rbRLink _ f ELSE gg.rbLLink _ f; tc _ g.rbColor; g.rbColor _ f.rbColor; f.rbColor _ tc; RETURN[f] }; END. CHANGE LOG Created by MBrown on January 18, 1980 10:58 AM Changed by MBrown on January 19, 1980 8:50 PM Changed by MBrown on 20-Jan-80 13:28 Changed by MBrown on March 4, 1980 2:17 PM Changed by MBrown on April 13, 1980 4:21 PM Changed by MBrown on April 13, 1980 6:01 PM Changed by MBrown on May 1, 1980 10:03 PM Changed by MBrown on May 7, 1980 6:36 PM Changed by MBrown on August 11, 1980 10:17 PM Changed by MBrown on October 29, 1980 8:42 PM Changed by MBrown on January 7, 1981 9:47 PM Changed by MBrown on March 1, 1982 3:30 pm Changed by MBrown on March 2, 1982 4:20 pm Changed by MBrown on June 28, 1982 11:12 am Changed by MBrown on August 26, 1982 11:11 pm Changed by MBrown on November 10, 1982 6:34 pm Changed by MBrown on October 20, 1983 11:38 pm àRedBlackTreeImpl.mesa Copyright c 1985 by Xerox Corporation. All rights reserved. MBrown on October 20, 1983 11:37 pm Russ Atkinson (RRA) May 7, 1985 12:11:16 pm PDT Carl Hauser, April 11, 1985 4:45:25 pm PST Notes on the data structure The color of a node should be thought of as the color of the edge coming into the node. The general color invariants for red-black trees are that (1) all external nodes are black, while internals are red or black, and (2) all paths from an internal node to an external node contain the same number of black arcs. The tree edges of a 2-3-4 tree become black arcs in the binarized red-black representation, while the internal 2-, 3-, and 4-nodes are represented by connected red subtrees: the degenerate one with one node, both of the possible two node trees, and the completely balanced tree on three nodes. Another way of stating this invariant is to say that no path contains two consecutive red arcs. The top-down insertion algorithm below is from Program 5 of "A Dichromatic Framework for Balanced Trees" by Leo Guibas and Bob Sedgewick. The deletion algorithm incorporates several changes to the algorithm in that paper. This program was originally adapted from Dave Gifford's StringStorageImpl.Mesa. forces extra validity checking of inputs. Locking mechanism Procedures exported to RedBlackTree Requires self.rbColor = self.rbRLink.rbColor = z.rbColor = Black, y.rbColor = Red. search the tree, rebalancing on the way down. an external node has been found; check to be sure that a duplicate key is not present. perform the insertion. do color flip. two reds in a row, so rebalance (gg may be self). Inject a red node at the root if necessary. Search the tree for the symmetric order succecessor of the node containing key (or the node itself if its RLink is z) If x is the node we're to return, save pointers to it and its parent for later. Note that if x is Red, no rotations happen now; this is what re-establishes the properties of g and f eventually... Single rotation to move the red link b onto the search path. Move back up the path to allow g and f to get re-established It is essential that this exit test be right here - after the single rotation, but before the double rotations. "Color flip" If search has been successful, f is now the symmetric order successor of result (or result itself if result.rbRLink = z), and g is its parent. f is Red, unless it is also the root. x and b are irrelevant. detach f from the tree. if f is not the result node, splice f into tree in place of result. quick and dirty recursive implementation, has overhead of one procedure call per call to procToApply. never called with root = z; this saves half of the procedure calls. quick and dirty recursive implementation, has overhead of one procedure call per call to procToApply. never called with root = y; this saves half of the procedure calls. Raises BadTable if the table t is not well-formed. Returns largest key in subtree rooted at x, number of black arcs on paths to external nodes from x, and whether or not this node is red. ERRORs if it finds two reds in a row, finds different distances to the external nodes (counting only red arcs), or finds keys out of order. If x is an internal node, increments count. check structure of the table header, sentinels, etc. check structure of table. Private procedure, used by Insert (1 call), Delete (2 calls). Balances a local portion of the tree. g is a child of gg (it may be either child). if g -rlink-> f -rlink-> x, or x <-llink- f <-llink- g, then only a single rotation is needed; otherwise two rotations are used. when called from insert, g is black and both f and x are red. after rebalancing, the new g is black, and is the parent of both the new f and the new x, which are both red. do first rotation if necessary do second rotation update link in great grandparent do color swap (when called from Insert, g is always Black and f is always Red) return g, new child of gg (name changed in second rotation) It worked the first time! Implemented EnumerateIncreasing. Fixed a bug in CheckTable that defeated checking of key order. Made changes to allow MinusInfinity as a Key. Changed Insert to use keyPresent flag, as in Guibas-Sedgewick Program 5. Changed Balance to compare pointers rather than keys, and eliminated the code for "swap g and f" since only g is returned from Balance. Moved inline definitions of Node structure to RedBlackTree, which we now OPEN. Coded Delete. Made LookupSmallest and LookupNextLarger PUBLIC, since they may be useful in testing Delete. Delete seemed to work the first time, a surprise. I removed some the redundant checks that were there for debugging; the old version will be called RedBlackCheckedImpl. Changed Delete to avoid a search from the root to find the parent of the result node. This involved actually understanding the algorithm; "cleaning up" the way rebalancing is done introduced several bugs. Added DestroyTable and Size. We now allocate/free TableObjects using procs in the RedBlackTree interface. Renamed module. Major revisions (prompted by first real client, the Juniper locks code, which will make further changes for its special needs). We take the key comparison function from the interface, instead of requiring "<" to be applicable. This means that comparisons may be expensive, so we save the result in a local variable to avoid doing twice the required number. MinusInfinity is no longer treated specially in the code. We do explicit field selections for the links and color fields, instead of using inline procs from the interface. Added Lookup3 proc for Juniper locks application. We now pass in ALL storage, including storage for table objects. y and z now point to nodes in the global frame of this module, rather than being replicated for each table; thus it is now unsafe for multiple processes to be inside of an instance of this module, even if they operate on distinct tables (it was always unsafe to perform concurrent operations on a single table). Eliminated all reference to MinusInfinity (it was being used in CheckTable). Changes for collectible storage, module monitor. New version for use in Alpine. No allocation done in package, even for sentinels. No more "AssignKey[,]", which was used only for sentinels; instead, check for z explicitly. No more "Key[]", and Compare now takes a Key and a Node. A Table is just a Node; no count of number of nods in a table is kept in the table. Other variants that are possible (but hopefully are not required): (1) use < and = instead of 3-way compares. (2) use object monitor (requires replicating z and y per-table), or no monitor. (3) Table and Node are different types (this is required for object monitor, to hold y and z.) Table is a RECORD [Node]. Clean up errors in interface. CEDAR implementation. Use Environment.Comparison. DuplicateKey is now an ERROR. Conversion to Cedar 5.0. Russ Atkinson (RRA) May 7, 1985 12:09:51 pm PDT Completely revised to make this a monitorized data abstraction. The user's data is now REFs, and the user supplies the comparison routine(s). ÊŒ˜codešœ™Kšœ Ïmœ1™