RopeImplExt.mesa, "Thick" string implementation extension
Copyright © 1984, 1985 by Xerox Corporation. All rights reserved.
Russ Atkinson (RRA) October 17, 1985 6:51:31 pm PDT
DIRECTORY
Rope,
RopePrivate;
RopeImplExt: CEDAR PROGRAM
IMPORTS Rope, RopePrivate
EXPORTS Rope
SHARES Rope
= BEGIN OPEN Rope, RopePrivate;
Run: PUBLIC PROC [s1: ROPE, pos1: INT, s2: ROPE, pos2: INT, case: BOOLTRUE] RETURNS [result: INT ← 0] = TRUSTED {
Returns the largest number of characters N such that s1 starting at pos1 is equal to s2 starting at pos2 for N characters. More formally: FOR i IN [0..N): s1[pos1+i] = s2[pos2+i]. If case is true, then case matters, otherwise upper and lower case characters are considered equal.
len1: INT;
str1: Text;
[len1, str1] ← SingleSize[s1];
IF NonNeg[pos1] < len1 THEN {
len2: INT;
str2: Text;
[len2, str2] ← SingleSize[s2];
IF NonNeg[pos2] < len2 THEN {
r1,r2: ROPENIL;
st1,sz1,lm1: INT ← 0;
st2,sz2,lm2: INT ← 0;
DO
IF st1 = lm1 THEN {
need a new piece from s1
[r1, st1, sz1] ← ContainingPiece[s1, pos1 ← pos1 + sz1];
IF sz1 = 0 THEN RETURN;
lm1 ← st1 + sz1};
IF st2 = lm2 THEN {
need a new piece from s2
[r2, st2, sz2] ← ContainingPiece[s2, pos2 ← pos2 + sz2];
IF sz2 = 0 THEN RETURN;
lm2 ← st2 + sz2};
{
c1: CHAR ← InlineFetch[r1, st1];
c2: CHAR ← InlineFetch[r2, st2];
IF NOT case THEN {
IF c1 <= 'Z AND c1 >= 'A THEN c1 ← c1 + ('a-'A);
IF c2 <= 'Z AND c2 >= 'A THEN c2 ← c2 + ('a-'A);
};
IF c1 # c2 THEN RETURN;
};
result ← result + 1;
st1 ← st1 + 1; st2 ← st2 + 1;
ENDLOOP;
}}};
Find: PUBLIC PROC [s1, s2: ROPE, pos1: INT ← 0, case: BOOLTRUE] RETURNS [INT] = TRUSTED {
index: INT ← Index[s1, pos1, s2, case];
IF index = InlineSize[s1] THEN RETURN [-1];
RETURN [index];
};
Index: PUBLIC PROC [s1: ROPE, pos1: INT, s2: ROPE, case: BOOLTRUE] RETURNS [INT] = TRUSTED {
Returns the smallest character position N such that s2 occurs in s1 at N and N >= pos1. If s2 does not occur in s1 at or after pos1, s1.length is returned. pos1 <= N < s1.length => FOR i IN [0..s2.length): s1[N+i] = s2[i]; N = s1.length => s2 does not occur in s1. If case is true, then case matters, otherwise upper and lower case characters are considered equal.
len1,len2, rem: INT; both: BOOL;
[len1,len2,both] ← DoubleSize[s1, s2];
rem ← IF pos1 >= len1 THEN 0 ELSE len1 - NonNeg[pos1];
IF rem >= len2 THEN {
c: CHAR ← InlineFetch[s2, 0];
IF len2 = 0 THEN RETURN [pos1];
rem ← rem - len2 + 1;
IF case
THEN {
Cmp: PROC [cc: CHAR] RETURNS [BOOL] = TRUSTED {
IF c = cc AND Run[s1, pos1+1, s2, 1, case]+1 = len2 THEN RETURN [TRUE];
pos1 ← pos1 + 1; RETURN [FALSE];
};
IF Map[s1, pos1, rem, Cmp] THEN RETURN [pos1]
}
ELSE {
LCmp: PROC [cc: CHAR] RETURNS [BOOL] = TRUSTED {
IF cc <= 'Z AND cc >= 'A THEN cc ← cc + ('a-'A);
IF c = cc AND Run[s1, pos1+1, s2, 1, case]+1 = len2 THEN RETURN [TRUE];
pos1 ← pos1 + 1; RETURN [FALSE];
};
IF c <= 'Z AND c >= 'A THEN c ← c + ('a-'A);
IF Map[s1, pos1, rem, LCmp] THEN RETURN [pos1];
};
};
RETURN [len1];
};
Match: PUBLIC PROC [pattern, object: ROPE, case: BOOLTRUE] RETURNS [BOOL] = TRUSTED {
Returns TRUE if the object matches the pattern, where the pattern may contain * to indicate that 0 or more characters will match. Returns FALSE otherwise. For example, using a*b as a pattern, some matching objects are: ab, a#b, a###b, and some not matching objects: abc, cde, bb, a, Ab. If case is true, then case matters, otherwise upper and lower case characters are considered equal.
submatch: PROC [i1: INT, len1: INT, i2: INT, len2: INT] RETURNS [BOOL] = TRUSTED {
WHILE len1 > 0 DO
c1: CHAR ← InlineFetch[pattern, i1];
IF c1 = '* THEN {
quick kill for * at end of pattern
IF len1 = 1 THEN RETURN [TRUE];
else must take all combinations
{-- first, accept the *
j1: INT ← i1 + 1;
nlen1: INT ← len1 - 1;
j2: INT ← i2;
nlen2: INT ← len2;
WHILE nlen2 >= 0 DO
IF submatch[j1, nlen1, j2, nlen2] THEN RETURN [TRUE];
j2 ← j2 + 1;
nlen2 ← nlen2 - 1;
ENDLOOP;
};
RETURN [FALSE];
};
IF len2 <= 0 THEN RETURN [FALSE];
at this point demand an exact match in both strings
{c2: CHAR ← InlineFetch[object, i2];
IF NOT case THEN {
IF c1 <= 'Z AND c1 >= 'A THEN c1 ← c1 + ('a-'A);
IF c2 <= 'Z AND c2 >= 'A THEN c2 ← c2 + ('a-'A);
};
IF c1 # c2 THEN RETURN [FALSE];
};
i1 ← i1 + 1;
len1 ← len1 - 1;
i2 ← i2 + 1;
len2 ← len2 - 1;
ENDLOOP;
RETURN [len2 = 0];
};
len1: INT ← InlineSize[pattern];
len2: INT ← InlineSize[object];
First, strip off the common tails until they differ (quick kill false), or the pattern has a * at the tail. This strip is easy, because we just decrement the lengths.
WHILE len1 > 0 DO
n: INT ← len1 - 1;
c1: CHAR ← InlineFetch[pattern, n];
c2: CHAR;
IF c1 = '* THEN EXIT;
IF len2 = 0 THEN RETURN [FALSE];
len1 ← n;
len2 ← len2 - 1;
c2 ← InlineFetch[object, len2];
IF NOT case THEN {
IF c1 <= 'Z AND c1 >= 'A THEN c1 ← c1 + ('a-'A);
IF c2 <= 'Z AND c2 >= 'A THEN c2 ← c2 + ('a-'A);
};
IF c1 # c2 THEN RETURN [FALSE];
ENDLOOP;
RETURN [submatch [0, len1, 0, len2]];
};
SkipTo: PUBLIC PROC [s: ROPE, pos: INT, skip: ROPE] RETURNS [INT] = TRUSTED {
... returns the lowest position N in s such that s[N] is in the skip string and N >= pos. If no such character occurs in s, then return s.length.
len: INT ← InlineSize[s];
skipText: Rope.Text = InlineFlatten[skip];
skiplen: NATIF skipText = NIL THEN 0 ELSE skipText.length;
IF pos < len AND skiplen # 0 THEN {
CharMatch: PROC [c: CHAR] RETURNS [BOOL] = TRUSTED {
FOR i: NAT IN [0..skiplen) DO
IF c = skipText[i] THEN RETURN [TRUE];
ENDLOOP;
pos ← pos + 1;
RETURN [FALSE];
};
IF Map[s, pos, len - pos, CharMatch] THEN RETURN [pos];
};
RETURN [len];
};
SkipOver: PUBLIC PROC [s: ROPE, pos: INT, skip: ROPE] RETURNS [INT] = TRUSTED {
... return the lowest position N in s such that s[N] is NOT in the skip string and N >= pos. If no such character occurs in s, then return s.length.
len: INT ← InlineSize[s];
skipText: Rope.Text = InlineFlatten[skip];
skiplen: NATIF skipText = NIL THEN 0 ELSE skipText.length;
IF pos >= len THEN RETURN [len];
IF skiplen # 0 THEN {
CharMatch: PROC [c: CHAR] RETURNS [BOOL] = TRUSTED {
FOR i: NAT IN [0..skiplen) DO
IF c = skipText[i] THEN GO TO found;
ENDLOOP;
RETURN [TRUE];
EXITS found => {pos ← pos + 1; RETURN [FALSE]};
};
IF Map[s, pos, len - pos, CharMatch] THEN RETURN [pos];
};
RETURN [pos];
};
VerifyStructure: PUBLIC PROC [s: ROPE] RETURNS [leaves,nodes,maxDepth: INT ← 0] = TRUSTED {
... traverses the structure of the given rope object; extra checking is performed to verify invariants.
IF s = NIL THEN RETURN;
WITH x: s SELECT FROM
text => {
leaves ← 1;
IF x.length > x.max THEN ERROR VerifyFailed};
node =>
WITH x: x SELECT FROM
substr => {
ref: ROPE ← x.base;
len1: INT ← Size[x.base];
IF x.start < 0 OR x.size <= 0 THEN ERROR VerifyFailed;
IF len1 < x.start + x.size THEN ERROR VerifyFailed;
[leaves, nodes, maxDepth] ← VerifyStructure[ref];
nodes ← nodes + 1};
concat => {
leaves1,nodes1,maxDepth1: INT;
left: ROPE ← x.base;
lSize: INT ← Size[left];
right: ROPE ← x.rest;
rSize: INT ← Size[right];
[leaves1, nodes1, maxDepth1] ← VerifyStructure[left];
[leaves, nodes, maxDepth] ← VerifyStructure[right];
leaves ← leaves + leaves1;
nodes ← nodes + nodes1 + 1;
IF maxDepth1 > maxDepth THEN maxDepth ← maxDepth1;
IF x.size # lSize + rSize THEN ERROR VerifyFailed;
IF x.pos # lSize THEN ERROR VerifyFailed;
};
replace => {
leaves1,nodes1,maxDepth1: INT;
old: ROPE ← x.base;
oldSize: INT ← Size[old];
repl: ROPE ← x.replace;
replSize: INT ← Size[repl];
[leaves, nodes, maxDepth] ← VerifyStructure[old];
[leaves1, nodes1, maxDepth1] ← VerifyStructure[repl];
leaves ← leaves + leaves1;
nodes ← nodes + nodes1 + 1;
IF maxDepth < maxDepth1 THEN maxDepth ← maxDepth1;
IF x.start > x.oldPos OR x.start > x.newPos THEN ERROR VerifyFailed;
IF x.newPos - x.start # replSize THEN ERROR VerifyFailed;
IF x.start < 0 OR x.start > x.size THEN ERROR VerifyFailed;
IF x.oldPos > oldSize THEN ERROR VerifyFailed;
};
object => {
leaves ← 1;
IF x.size < 0 OR x.fetch = NIL THEN ERROR VerifyFailed;
};
ENDCASE => ERROR NoRope;
ENDCASE => ERROR NoRope;
maxDepth ← maxDepth + 1;
IF maxDepth # InlineDepth[s] THEN ERROR VerifyFailed;
};
VerifyFailed: PUBLIC ERROR = CODE;
***** New Balance implementation
This implementation is courtesy of Michael Plass (March 1985).
Stopper: TYPE = PROC [part: Part] RETURNS [BOOL];
Part: TYPE ~ RECORD [base: ROPE, start: INT, end: INT];
Must have 0 <= start <= end <= Size[base];
HeightArray: TYPE = ARRAY [0..RopePrivate.MaxDepth) OF INT;
minSizeForHeight: REF HeightArray ← NIL;
InitMinSizeForHeight: PROC ~ {
Initializes the height array. Does not need monitoring, since the array is completely initialized by the time the assignment occurs, and REF assignment is atomic. Races can only cause a little extra allocation.
IF minSizeForHeight = NIL THEN {
h: REF HeightArray ← NEW[HeightArray];
h[0] ← 0;
A NIL rope has no characters and height 0.
h[1] ← 1;
A flat rope ought to have at least one character.
h[2] ← Rope.FlatMax+1;
Must be at least this big to warrant any non-flat structure.
FOR i: NAT IN [3..RopePrivate.MaxDepth) DO
Use Fibonacci recurrence to compute rest.
Be careful about overflow here...
IF INT.LAST - h[i-1] < h[i-2]
THEN h[i] ← INT.LAST
ELSE h[i] ← h[i-1] + h[i-2];
ENDLOOP;
minSizeForHeight ← h;
};
};
PartIsBalanced: Stopper ~ {
Examines only the root.
size: INT ~ Rope.InlineSize[part.base];
height: INT ~ RopePrivate.InlineDepth[part.base];
IF part.start # 0
OR part.end # size
OR height >= RopePrivate.MaxDepth
THEN RETURN [FALSE];
IF minSizeForHeight = NIL THEN InitMinSizeForHeight[];
IF size < minSizeForHeight[height] THEN RETURN [FALSE];
WITH part.base SELECT FROM
substr: REF Rope.RopeRep.node.substr => RETURN [height<=1];
concat: REF Rope.RopeRep.node.concat => RETURN [TRUE];
replace: REF Rope.RopeRep.node.replace => RETURN [FALSE];
ENDCASE => RETURN [TRUE];
};
Balance: PUBLIC PROC [base: ROPE, start: INT ← 0, len: INT ← MaxLen, flat: INT ← FlatMax] RETURNS [ROPE] = {
This procedure is here mostly to match the new implementation against the old definition.
RETURN [Rope.Substr[NewBalance[base], start, len]];
};
NewBalance: PROC [rope: ROPE] RETURNS [ROPE] ~ {
ARep: TYPE ~ RECORD[index: INT𡤀, sub: ARRAY [0..d) OF AElement, rest: REF ARep←NIL];
AElement: TYPE ~ RECORD [base: ROPE, size: INT];
a: ARep; -- An extensible array that is very cheap if it is small.
accel: REF ARep ← NIL;
aN: INT ← 0;
d: NAT ~ 40;
StoreA: PROC [i: INT, e: AElement] ~ {
IF i-a.index < d
THEN a.sub[i-a.index] ← e
ELSE {
IF a.rest = NIL THEN {a.rest ← accel ← NEW[ARep]; accel.index ← d};
IF i < accel.index THEN accel ← a.rest;
WHILE i-accel.index >= d DO
IF accel.rest = NIL THEN {
accel.rest ← NEW[ARep];
accel.rest.index�l.index+d};
accel ← accel.rest;
ENDLOOP;
accel.sub[i-accel.index] ← e;
};
};
ASub: PROC [i: INT] RETURNS [e: AElement] ~ {
IF i-a.index < d
THEN e ← a.sub[i-a.index]
ELSE {
IF i < accel.index THEN accel ← a.rest;
WHILE i-accel.index >= d DO accel ← accel.rest ENDLOOP;
e ← accel.sub[i-accel.index];
};
};
SavePart: PROC [part: Part] ~ {
IF part.end > part.start THEN {
rope: ROPE ← Rope.Substr[part.base, part.start, part.end-part.start];
StoreA[aN, [rope, Rope.InlineSize[rope]]];
aN ← aN + 1;
};
};
BalanceRange: PROC [first: INT, end: INT, size: INT] RETURNS [ROPE] ~ {
Balances pieces [first..end), whose sizes must sum to size.
SELECT TRUE FROM
first = end => RETURN[NIL];
end-first = 1 => RETURN[ASub[first].base];
ENDCASE => {
i: INT ← first+1;
sizetoi: INT ← ASub[first].size;
FOR sizei: INT ← ASub[i].size, ASub[i].size
WHILE i < end-1 AND ((sizetoi+sizei)*2 < size OR ABS[sizetoi*2-size] > ABS[(sizetoi+sizei)*2-size]) DO
sizetoi ← sizetoi + sizei;
i ← i + 1;
ENDLOOP;
RETURN[Rope.Concat[BalanceRange[first, i, sizetoi], BalanceRange[i, end, size-sizetoi]]];
}
};
part: Part ~ [rope, 0, Rope.Size[rope]];
MapParts[part, SavePart, PartIsBalanced];
RETURN [BalanceRange[0, aN, part.end-part.start]]
};
BadPart: SIGNAL ~ CODE;
MapParts: PROC[part: Part, action: PROC[Part], stopDescent: Stopper←NIL] ~{
IF stopDescent#NIL AND stopDescent[part]
THEN action[part]
ELSE {
size: INT ~ Rope.InlineSize[part.base];
IF part.start < 0 OR part.end NOT IN [part.start..part.start+size] THEN ERROR BadPart;
WITH part.base SELECT FROM
substr: REF Rope.RopeRep.node.substr => {
MapParts[[substr.base, substr.start+part.start, substr.start+part.end], action, stopDescent];
};
concat: REF Rope.RopeRep.node.concat => {
IF part.start < concat.pos THEN {
MapParts[[concat.base, part.start, MIN[part.end, concat.pos]], action, stopDescent];
};
IF part.end > concat.pos THEN {
newStart: INTMAX[part.start-concat.pos, 0];
newEnd: INT ← part.end-concat.pos;
MapParts[[concat.rest, newStart, newEnd], action, stopDescent];
};
};
replace: REF Rope.RopeRep.node.replace => {
len1: INT ~ replace.start;
len2: INT ~ replace.newPos-replace.start;
len3: INT ~ replace.size-replace.newPos;
offset3: INT ~ replace.oldPos;
IF part.start < len1 THEN {
MapParts[[replace.base, part.start, MIN[part.end, len1]], action, stopDescent];
};
IF part.start < len1+len2 AND part.end > len1 THEN {
newStart: INT ~ MAX[part.start-len1, 0];
newEnd: INT ~ MIN[part.end-len1, len2];
MapParts[[replace.replace, newStart, newEnd], action, stopDescent];
};
IF part.end > len1+len2 THEN {
newStart: INTMAX[part.start-(len1+len2), 0]+offset3;
newEnd: INTMIN[part.end-(len1+len2), len3]+offset3;
MapParts[[replace.base, newStart, newEnd], action, stopDescent];
};
};
ENDCASE => action[part];
};
};
END.