[Cyan]<Cedar6.0>Rope>RopeImplExt.mesa!6

RopeImplExt.mesa, "Thick" string implementation extension

Russ Atkinson (RRA) October 17, 1985 6:51:31 pm PDT

DIRECTORY

Rope,

RopePrivate;

RopeImplExt: CEDAR PROGRAM

IMPORTS Rope, RopePrivate

EXPORTS Rope

SHARES Rope

= BEGIN OPEN Rope, RopePrivate;

Run: PUBLIC PROC [s1: ROPE, pos1: INT, s2: ROPE, pos2: INT, case: BOOL ← TRUE] RETURNS [result: INT ← 0] = TRUSTED {

Returns the largest number of characters N such that s1 starting at pos1 is equal to s2 starting at pos2 for N characters. More formally: FOR i IN [0..N): s1[pos1+i] = s2[pos2+i]. If case is true, then case matters, otherwise upper and lower case characters are considered equal.

len1: INT;

str1: Text;

[len1, str1] ← SingleSize[s1];

IF NonNeg[pos1] < len1 THEN {

len2: INT;

str2: Text;

[len2, str2] ← SingleSize[s2];

IF NonNeg[pos2] < len2 THEN {

r1,r2: ROPE ← NIL;

st1,sz1,lm1: INT ← 0;

st2,sz2,lm2: INT ← 0;

IF st1 = lm1 THEN {

need a new piece from s1

[r1, st1, sz1] ← ContainingPiece[s1, pos1 ← pos1 + sz1];

IF sz1 = 0 THEN RETURN;

lm1 ← st1 + sz1};

IF st2 = lm2 THEN {

need a new piece from s2

[r2, st2, sz2] ← ContainingPiece[s2, pos2 ← pos2 + sz2];

IF sz2 = 0 THEN RETURN;

lm2 ← st2 + sz2};

{

c1: CHAR ← InlineFetch[r1, st1];

c2: CHAR ← InlineFetch[r2, st2];

IF NOT case THEN {

IF c1 <= 'Z AND c1 >= 'A THEN c1 ← c1 + ('a-'A);

IF c2 <= 'Z AND c2 >= 'A THEN c2 ← c2 + ('a-'A);

};

IF c1 # c2 THEN RETURN;

};

result ← result + 1;

st1 ← st1 + 1; st2 ← st2 + 1;

ENDLOOP;

}}};

Find: PUBLIC PROC [s1, s2: ROPE, pos1: INT ← 0, case: BOOL ← TRUE] RETURNS [INT] = TRUSTED {

index: INT ← Index[s1, pos1, s2, case];

IF index = InlineSize[s1] THEN RETURN [-1];

RETURN [index];

};

Index: PUBLIC PROC [s1: ROPE, pos1: INT, s2: ROPE, case: BOOL ← TRUE] RETURNS [INT] = TRUSTED {

Returns the smallest character position N such that s2 occurs in s1 at N and N >= pos1. If s2 does not occur in s1 at or after pos1, s1.length is returned. pos1 <= N < s1.length => FOR i IN [0..s2.length): s1[N+i] = s2[i]; N = s1.length => s2 does not occur in s1. If case is true, then case matters, otherwise upper and lower case characters are considered equal.

len1,len2, rem: INT; both: BOOL;

[len1,len2,both] ← DoubleSize[s1, s2];

rem ← IF pos1 >= len1 THEN 0 ELSE len1 - NonNeg[pos1];

IF rem >= len2 THEN {

c: CHAR ← InlineFetch[s2, 0];

IF len2 = 0 THEN RETURN [pos1];

rem ← rem - len2 + 1;

IF case

THEN {

Cmp: PROC [cc: CHAR] RETURNS [BOOL] = TRUSTED {

IF c = cc AND Run[s1, pos1+1, s2, 1, case]+1 = len2 THEN RETURN [TRUE];

pos1 ← pos1 + 1; RETURN [FALSE];

};

IF Map[s1, pos1, rem, Cmp] THEN RETURN [pos1]

}

ELSE {

LCmp: PROC [cc: CHAR] RETURNS [BOOL] = TRUSTED {

IF cc <= 'Z AND cc >= 'A THEN cc ← cc + ('a-'A);

IF c = cc AND Run[s1, pos1+1, s2, 1, case]+1 = len2 THEN RETURN [TRUE];

pos1 ← pos1 + 1; RETURN [FALSE];

};

IF c <= 'Z AND c >= 'A THEN c ← c + ('a-'A);

IF Map[s1, pos1, rem, LCmp] THEN RETURN [pos1];

};

RETURN [len1];

};

Match: PUBLIC PROC [pattern, object: ROPE, case: BOOL ← TRUE] RETURNS [BOOL] = TRUSTED {

Returns TRUE if the object matches the pattern, where the pattern may contain * to indicate that 0 or more characters will match. Returns FALSE otherwise. For example, using a*b as a pattern, some matching objects are: ab, a#b, a###b, and some not matching objects: abc, cde, bb, a, Ab. If case is true, then case matters, otherwise upper and lower case characters are considered equal.

submatch: PROC [i1: INT, len1: INT, i2: INT, len2: INT] RETURNS [BOOL] = TRUSTED {

WHILE len1 > 0 DO

c1: CHAR ← InlineFetch[pattern, i1];

IF c1 = '* THEN {

quick kill for * at end of pattern

IF len1 = 1 THEN RETURN [TRUE];

else must take all combinations

{-- first, accept the *

j1: INT ← i1 + 1;

nlen1: INT ← len1 - 1;

j2: INT ← i2;

nlen2: INT ← len2;

WHILE nlen2 >= 0 DO

IF submatch[j1, nlen1, j2, nlen2] THEN RETURN [TRUE];

j2 ← j2 + 1;

nlen2 ← nlen2 - 1;

ENDLOOP;

};

RETURN [FALSE];

};

IF len2 <= 0 THEN RETURN [FALSE];

at this point demand an exact match in both strings

{c2: CHAR ← InlineFetch[object, i2];

IF NOT case THEN {

IF c1 <= 'Z AND c1 >= 'A THEN c1 ← c1 + ('a-'A);

IF c2 <= 'Z AND c2 >= 'A THEN c2 ← c2 + ('a-'A);

};

IF c1 # c2 THEN RETURN [FALSE];

};

i1 ← i1 + 1;

len1 ← len1 - 1;

i2 ← i2 + 1;

len2 ← len2 - 1;

ENDLOOP;

RETURN [len2 = 0];

};

len1: INT ← InlineSize[pattern];

len2: INT ← InlineSize[object];

First, strip off the common tails until they differ (quick kill false), or the pattern has a * at the tail. This strip is easy, because we just decrement the lengths.

WHILE len1 > 0 DO

n: INT ← len1 - 1;

c1: CHAR ← InlineFetch[pattern, n];

c2: CHAR;

IF c1 = '* THEN EXIT;

IF len2 = 0 THEN RETURN [FALSE];

len1 ← n;

len2 ← len2 - 1;

c2 ← InlineFetch[object, len2];

IF NOT case THEN {

IF c1 <= 'Z AND c1 >= 'A THEN c1 ← c1 + ('a-'A);

IF c2 <= 'Z AND c2 >= 'A THEN c2 ← c2 + ('a-'A);

};

IF c1 # c2 THEN RETURN [FALSE];

ENDLOOP;

RETURN [submatch [0, len1, 0, len2]];

};

SkipTo: PUBLIC PROC [s: ROPE, pos: INT, skip: ROPE] RETURNS [INT] = TRUSTED {

... returns the lowest position N in s such that s[N] is in the skip string and N >= pos. If no such character occurs in s, then return s.length.

len: INT ← InlineSize[s];

skipText: Rope.Text = InlineFlatten[skip];

skiplen: NAT ← IF skipText = NIL THEN 0 ELSE skipText.length;

IF pos < len AND skiplen # 0 THEN {

CharMatch: PROC [c: CHAR] RETURNS [BOOL] = TRUSTED {

FOR i: NAT IN [0..skiplen) DO

IF c = skipText[i] THEN RETURN [TRUE];

ENDLOOP;

pos ← pos + 1;

RETURN [FALSE];

};

IF Map[s, pos, len - pos, CharMatch] THEN RETURN [pos];

};

RETURN [len];

};

SkipOver: PUBLIC PROC [s: ROPE, pos: INT, skip: ROPE] RETURNS [INT] = TRUSTED {

... return the lowest position N in s such that s[N] is NOT in the skip string and N >= pos. If no such character occurs in s, then return s.length.

len: INT ← InlineSize[s];

skipText: Rope.Text = InlineFlatten[skip];

skiplen: NAT ← IF skipText = NIL THEN 0 ELSE skipText.length;

IF pos >= len THEN RETURN [len];

IF skiplen # 0 THEN {

CharMatch: PROC [c: CHAR] RETURNS [BOOL] = TRUSTED {

FOR i: NAT IN [0..skiplen) DO

IF c = skipText[i] THEN GO TO found;

ENDLOOP;

RETURN [TRUE];

EXITS found => {pos ← pos + 1; RETURN [FALSE]};

};

IF Map[s, pos, len - pos, CharMatch] THEN RETURN [pos];

};

RETURN [pos];

};

VerifyStructure: PUBLIC PROC [s: ROPE] RETURNS [leaves,nodes,maxDepth: INT ← 0] = TRUSTED {

... traverses the structure of the given rope object; extra checking is performed to verify invariants.

IF s = NIL THEN RETURN;

WITH x: s SELECT FROM

text => {

leaves ← 1;

IF x.length > x.max THEN ERROR VerifyFailed};

node =>

WITH x: x SELECT FROM

substr => {

ref: ROPE ← x.base;

len1: INT ← Size[x.base];

IF x.start < 0 OR x.size <= 0 THEN ERROR VerifyFailed;

IF len1 < x.start + x.size THEN ERROR VerifyFailed;

[leaves, nodes, maxDepth] ← VerifyStructure[ref];

nodes ← nodes + 1};

concat => {

leaves1,nodes1,maxDepth1: INT;

left: ROPE ← x.base;

lSize: INT ← Size[left];

right: ROPE ← x.rest;

rSize: INT ← Size[right];

[leaves1, nodes1, maxDepth1] ← VerifyStructure[left];

[leaves, nodes, maxDepth] ← VerifyStructure[right];

leaves ← leaves + leaves1;

nodes ← nodes + nodes1 + 1;

IF maxDepth1 > maxDepth THEN maxDepth ← maxDepth1;

IF x.size # lSize + rSize THEN ERROR VerifyFailed;

IF x.pos # lSize THEN ERROR VerifyFailed;

};

replace => {

leaves1,nodes1,maxDepth1: INT;

old: ROPE ← x.base;

oldSize: INT ← Size[old];

repl: ROPE ← x.replace;

replSize: INT ← Size[repl];

[leaves, nodes, maxDepth] ← VerifyStructure[old];

[leaves1, nodes1, maxDepth1] ← VerifyStructure[repl];

leaves ← leaves + leaves1;

nodes ← nodes + nodes1 + 1;

IF maxDepth < maxDepth1 THEN maxDepth ← maxDepth1;

IF x.start > x.oldPos OR x.start > x.newPos THEN ERROR VerifyFailed;

IF x.newPos - x.start # replSize THEN ERROR VerifyFailed;

IF x.start < 0 OR x.start > x.size THEN ERROR VerifyFailed;

IF x.oldPos > oldSize THEN ERROR VerifyFailed;

};

object => {

leaves ← 1;

IF x.size < 0 OR x.fetch = NIL THEN ERROR VerifyFailed;

};

ENDCASE => ERROR NoRope;

maxDepth ← maxDepth + 1;

IF maxDepth # InlineDepth[s] THEN ERROR VerifyFailed;

};

VerifyFailed: PUBLIC ERROR = CODE;

***** New Balance implementation

This implementation is courtesy of Michael Plass (March 1985).

Stopper: TYPE = PROC [part: Part] RETURNS [BOOL];

Part: TYPE ~ RECORD [base: ROPE, start: INT, end: INT];

Must have 0 <= start <= end <= Size[base];

HeightArray: TYPE = ARRAY [0..RopePrivate.MaxDepth) OF INT;

minSizeForHeight: REF HeightArray ← NIL;

InitMinSizeForHeight: PROC ~ {

Initializes the height array. Does not need monitoring, since the array is completely initialized by the time the assignment occurs, and REF assignment is atomic. Races can only cause a little extra allocation.

IF minSizeForHeight = NIL THEN {

h: REF HeightArray ← NEW[HeightArray];

h[0] ← 0;

A NIL rope has no characters and height 0.

h[1] ← 1;

A flat rope ought to have at least one character.

h[2] ← Rope.FlatMax+1;

Must be at least this big to warrant any non-flat structure.

FOR i: NAT IN [3..RopePrivate.MaxDepth) DO

Use Fibonacci recurrence to compute rest.

Be careful about overflow here...

IF INT.LAST - h[i-1] < h[i-2]

THEN h[i] ← INT.LAST

ELSE h[i] ← h[i-1] + h[i-2];

ENDLOOP;

minSizeForHeight ← h;

};

PartIsBalanced: Stopper ~ {

Examines only the root.

size: INT ~ Rope.InlineSize[part.base];

height: INT ~ RopePrivate.InlineDepth[part.base];

IF part.start # 0

OR part.end # size

OR height >= RopePrivate.MaxDepth

THEN RETURN [FALSE];

IF minSizeForHeight = NIL THEN InitMinSizeForHeight[];

IF size < minSizeForHeight[height] THEN RETURN [FALSE];

WITH part.base SELECT FROM

substr: REF Rope.RopeRep.node.substr => RETURN [height<=1];

concat: REF Rope.RopeRep.node.concat => RETURN [TRUE];

replace: REF Rope.RopeRep.node.replace => RETURN [FALSE];

ENDCASE => RETURN [TRUE];

};

Balance: PUBLIC PROC [base: ROPE, start: INT ← 0, len: INT ← MaxLen, flat: INT ← FlatMax] RETURNS [ROPE] = {

This procedure is here mostly to match the new implementation against the old definition.

RETURN [Rope.Substr[NewBalance[base], start, len]];

};

NewBalance: PROC [rope: ROPE] RETURNS [ROPE] ~ {

ARep: TYPE ~ RECORD[index: INT𡤀, sub: ARRAY [0..d) OF AElement, rest: REF ARep←NIL];

AElement: TYPE ~ RECORD [base: ROPE, size: INT];

a: ARep; -- An extensible array that is very cheap if it is small.

accel: REF ARep ← NIL;

aN: INT ← 0;

d: NAT ~ 40;

StoreA: PROC [i: INT, e: AElement] ~ {

IF i-a.index < d

THEN a.sub[i-a.index] ← e

ELSE {

IF a.rest = NIL THEN {a.rest ← accel ← NEW[ARep]; accel.index ← d};

IF i < accel.index THEN accel ← a.rest;

WHILE i-accel.index >= d DO

IF accel.rest = NIL THEN {

accel.rest ← NEW[ARep];

accel.rest.index�l.index+d};

accel ← accel.rest;

ENDLOOP;

accel.sub[i-accel.index] ← e;

};

ASub: PROC [i: INT] RETURNS [e: AElement] ~ {

IF i-a.index < d

THEN e ← a.sub[i-a.index]

ELSE {

IF i < accel.index THEN accel ← a.rest;

WHILE i-accel.index >= d DO accel ← accel.rest ENDLOOP;

e ← accel.sub[i-accel.index];

};

SavePart: PROC [part: Part] ~ {

IF part.end > part.start THEN {

rope: ROPE ← Rope.Substr[part.base, part.start, part.end-part.start];

StoreA[aN, [rope, Rope.InlineSize[rope]]];

aN ← aN + 1;

};

BalanceRange: PROC [first: INT, end: INT, size: INT] RETURNS [ROPE] ~ {

Balances pieces [first..end), whose sizes must sum to size.

SELECT TRUE FROM

first = end => RETURN[NIL];

end-first = 1 => RETURN[ASub[first].base];

ENDCASE => {

i: INT ← first+1;

sizetoi: INT ← ASub[first].size;

FOR sizei: INT ← ASub[i].size, ASub[i].size

WHILE i < end-1 AND ((sizetoi+sizei)*2 < size OR ABS[sizetoi*2-size] > ABS[(sizetoi+sizei)*2-size]) DO

sizetoi ← sizetoi + sizei;

i ← i + 1;

ENDLOOP;

RETURN[Rope.Concat[BalanceRange[first, i, sizetoi], BalanceRange[i, end, size-sizetoi]]];

}

};

part: Part ~ [rope, 0, Rope.Size[rope]];

MapParts[part, SavePart, PartIsBalanced];

RETURN [BalanceRange[0, aN, part.end-part.start]]

};

BadPart: SIGNAL ~ CODE;

MapParts: PROC[part: Part, action: PROC[Part], stopDescent: Stopper←NIL] ~{

IF stopDescent#NIL AND stopDescent[part]

THEN action[part]

ELSE {

size: INT ~ Rope.InlineSize[part.base];

IF part.start < 0 OR part.end NOT IN [part.start..part.start+size] THEN ERROR BadPart;

WITH part.base SELECT FROM

substr: REF Rope.RopeRep.node.substr => {

MapParts[[substr.base, substr.start+part.start, substr.start+part.end], action, stopDescent];

};

concat: REF Rope.RopeRep.node.concat => {

IF part.start < concat.pos THEN {

MapParts[[concat.base, part.start, MIN[part.end, concat.pos]], action, stopDescent];

};

IF part.end > concat.pos THEN {

newStart: INT ← MAX[part.start-concat.pos, 0];

newEnd: INT ← part.end-concat.pos;

MapParts[[concat.rest, newStart, newEnd], action, stopDescent];

};

replace: REF Rope.RopeRep.node.replace => {

len1: INT ~ replace.start;

len2: INT ~ replace.newPos-replace.start;

len3: INT ~ replace.size-replace.newPos;

offset3: INT ~ replace.oldPos;

IF part.start < len1 THEN {

MapParts[[replace.base, part.start, MIN[part.end, len1]], action, stopDescent];

};

IF part.start < len1+len2 AND part.end > len1 THEN {

newStart: INT ~ MAX[part.start-len1, 0];

newEnd: INT ~ MIN[part.end-len1, len2];

MapParts[[replace.replace, newStart, newEnd], action, stopDescent];

};

IF part.end > len1+len2 THEN {

newStart: INT ← MAX[part.start-(len1+len2), 0]+offset3;

newEnd: INT ← MIN[part.end-(len1+len2), len3]+offset3;

MapParts[[replace.base, newStart, newEnd], action, stopDescent];

};

ENDCASE => action[part];

};

END.