RopeImpl.mesa, "Thick" string implementation
Copyright Ó 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993 by Xerox Corporation. All rights reserved.
Willie-sue, March 3, 1992 10:18 pm PST
Russ Atkinson (RRA) October 13, 1989 3:16:42 pm PDT
Christian Jacobi, October 5, 1992 5:57 pm PDT
Mike Spreitzer, June 5, 1990 1:32:27 pm PDT
Carl Hauser, July 10, 1990 9:32 am PDT
Doug Wyatt, November 22, 1991 5:06 pm PST
Michael Plass, September 13, 1993 8:55 am PDT
RopeImpl:
CEDAR
PROGRAM
IMPORTS Ascii, Basics, RefText, RopePrivate, SafeStorage
EXPORTS Rope
SHARES Rope
= BEGIN OPEN Rope, RopePrivate;
For those who care, this is the official explanation of the XRopeRep variants:
Note: NIL is allowed as a valid ROPE.
Note: ALL integer components of the representation must be non-negative.
SELECT x: x FROM
eightbit => {
SELECT x: x FROM
text => {
[0..x.length) is the range of char indexes
[0..x.max) is the number of chars of storage reserved
all Rope operations creating new text objects init x.length = x.max
x.length <= x.max is required
the bit pattern of the text IS IDENTICAL to TEXT and StringBody!!!!
};
node => {
x.length is ignored
x.depth is the depth of the structure (for triggering Balance)
SELECT x:x FROM
substr => {
[0..x.size) is the range of char indexes
x.base contains chars indexed by [0..x.size)
[0..x.size) in x ==> [x.start..x.start+x.size) in x.base
Size[x.base] >= x.start + x.size
};
concat => {
[0..x.size) is the range of char indexes
x.base contains chars indexed by [0..x.pos)
[0..x.pos) in x ==> [0..x.pos) in x.base
x.rest contains the chars indexed by [x.pos..x.size)
[x.pos..x.size) in x ==> [0..x.size-x.pos) in x.base
x.pos = Size[x.base] AND x.size = x.pos + Size[x.rest]
};
replace => {
[0..x.size) is the range of char indexes
x.base contains chars indexed by [0..x.start), [x.newPos..x.size)
[0..x.start) in x ==> [0..x.start) in x.base
[x.newPos..x.size) in x ==> [x.oldPos..Size[x.base]) in x.base
x.rest contains the chars indexed by [x.start..x.newPos)
[x.start..x.newPos) in x => [0..x.newPos-x.start) in x.base
x.size >= x.newPos >= x.start AND x.oldPos >= x.start
x.size - x.newPos = Size[x.base] - x.oldPos
};
object => {
[0..x.size) is the range of char indexes
x.base is the data needed by the user-supplied operations
x.fetch[x.base, i] should fetch the ith char AND x.fetch # NIL
x.map[x.base, st, len, action] implements Map[x, st, len, action]
x.move[block, x.base, st] implements MoveChars[block, x, st]
it is OK to have x.map = NIL OR x.move = NIL
};
ENDCASE => ERROR NoRope
}
ENDCASE => ERROR NoRope
};
extended => {
[0..x.size) is the range of char indexes
ext is for use by the extended-char-set superclass of ROPE.
}
ENDCASE => ERROR NoRope
charsPerWord: NAT = BYTES[WORD]/BYTES[CHAR];
RawCharsPtr: TYPE = POINTER TO Basics.RawChars;
WordPtr: TYPE = POINTER TO WORD;
errors peculiar to Rope
NoRope: PUBLIC ERROR = CODE;
emptyRope: Text =
NEW[TextRep[0]];
Watch out for startup problems with literals. Don't use "" here!
The following variables are here so Cirio could find ROPE and Text in the target world.
aXRope: XROPE ¬ NIL;
aRope: ROPE ¬ NIL;
aText: Rope.Text ¬ NIL;
untracedZone: ZONE ¬ NIL; -- initialized in RopeImpl's program body
NewText:
PUBLIC
PROC [size: TextBound]
RETURNS [text: Text] = {
procedure to allocate new Text objects
IF size = 0 THEN RETURN [emptyRope];
text ¬ untracedZone.NEW[TextRep[size]];
text.length ¬ size;
};
Substr:
PUBLIC
PROC [base:
ROPE, start:
INT ¬ 0, len:
INT ¬ MaxLen]
RETURNS [new:
ROPE] =
TRUSTED {
... returns a subrope of the base. BoundsFault occurs if start < 0 or start > Length[base].
size: INT ¬ InlineSize[base];
rem: INT ¬ NonNeg[size - NonNeg[start]];
depth: INTEGER ¬ 1;
IF len <= 0 THEN RETURN [emptyRope] ELSE IF len > rem THEN len ¬ rem;
IF start = 0 AND len = rem THEN RETURN [base];
IF len <= FlatMax THEN RETURN [Flatten[base, start, len]];
At this point the resulting rope is large enough to need a separate node. The idea is to dive down through as many levels of rope objects until we get to the deepest such object that fully contains the specified rope.
(note: change base last, since it is aliased with x!)
DO
WITH x: base
SELECT
FROM
text => EXIT;
node =>
WITH x: x
SELECT
FROM
substr => {
start ¬ start + x.start;
base ¬ x.base;
};
concat => {
rem: INT ¬ x.pos - start;
IF rem > 0
THEN {IF len > rem THEN {depth ¬ x.depth; EXIT}; base ¬ x.base}
ELSE {start ¬ -rem; base ¬ x.rest};
};
replace => {
len1: INT ¬ x.start - start;
IF len1 > 0
THEN {
substr starts in 1st section
IF len > len1 THEN {depth ¬ x.depth; EXIT};
entirely in first section, so go deeper
base ¬ x.base;
}
ELSE {
substr starts in middle or last sections
xnew: INT ¬ x.newPos;
len2: INT ¬ xnew - start;
IF len2 > 0
THEN {
substr starts in middle section
IF len > len2 THEN {depth ¬ x.depth; EXIT};
entirely in middle section
start ¬ -len1;
base ¬ x.replace;
}
ELSE {
entirely in last section
start ¬ x.oldPos - len2;
base ¬ x.base;
};
};
};
object =>
EXIT;
no sub-structure
ENDCASE => ERROR NoRope;
ENDCASE => ERROR NoRope;
IF start # 0 THEN LOOP;
IF len = InlineSize[base] THEN RETURN [base];
ENDLOOP;
[] ¬ NonNeg[start];
[] ¬ NonNeg[len];
new ¬
NEW[Tsubstr ¬
[eightbit[0, node[depth: depth + 1, cases: substr[size: len, base: base, start: start]]]]];
IF depth > MaxDepth THEN new ¬ NewBalance[new];
};
Cat:
PUBLIC
PROC [r1, r2, r3, r4, r5:
ROPE]
RETURNS [
ROPE] =
TRUSTED {
Return the concatenation of the given 5 ropes.
RETURN [Concat[Concat[r1, r2], Concat[Concat[r3, r4], r5]]];
};
Concat:
PUBLIC
PROC [base, rest:
ROPE]
RETURNS [
ROPE] = {
Return the concatenation of the two ropes. If the result is small enough, then it will be flat. Otherwise we need to create a new node.
IF rest = NIL THEN RETURN [base];
RETURN[SafeConcat[base, rest, TRUE]];
};
SafeConcat:
PROC [base, rest:
ROPE, checkBalance:
BOOL ¬
FALSE]
RETURNS [new:
ROPE] ~
TRUSTED {
This private version does not trigger a rebalancing unless asked; it is used inside of NewBalance to ensure that an infinite recursion does not happen.
baseStr, restStr: Text;
baseLen, restLen, size: INT;
depth: INTEGER ¬ 1;
IF rest = NIL THEN RETURN [base];
[baseLen, baseStr] ¬ SingleSize[base];
IF baseLen = 0 THEN RETURN [rest];
[restLen, restStr] ¬ SingleSize[rest];
IF restLen = 0 THEN RETURN [base];
size ¬ CheckLongAdd[baseLen,restLen];
IF size <= FlatMax
THEN {
The result is small enough to make it flat.
str: Text ¬ NewText[QShort[size]];
index: CARDINAL ¬ 0;
AddChar:
PROC [c:
CHAR]
RETURNS [
BOOL] =
TRUSTED {
QStore[c, str, index];
index ¬ index + 1;
RETURN [FALSE];
};
IF baseStr =
NIL
THEN [] ¬ Map[base, 0, baseLen, AddChar]
ELSE
FOR i: TextBound
IN [0..QShort[baseLen])
DO
QStore[QFetch[baseStr, i], str, index];
index ¬ index + 1;
ENDLOOP;
IF restStr =
NIL
THEN [] ¬ Map[rest, 0, restLen, AddChar]
ELSE
FOR i: TextBound
IN [0..QShort[restLen])
DO
QStore[QFetch[restStr, i], str, index];
index ¬ index + 1;
ENDLOOP;
RETURN [str];
};
SELECT
TRUE
FROM
restLen < FlatMax => {
Possibly can reduce depth by combining with a concat node.
WITH x: base
SELECT
FROM
node =>
WITH x: x
SELECT
FROM
concat =>
IF x.size-x.pos < FlatMax/2
THEN {
baseLen ¬ x.pos;
rest ¬ SafeConcat[x.rest, rest];
base ¬ x.base;
};
ENDCASE;
ENDCASE;
};
baseLen < FlatMax => {
Possibly can reduce depth by combining with a concat node.
WITH x: base
SELECT
FROM
node =>
WITH x: x
SELECT
FROM
concat =>
IF x.pos < FlatMax/2
THEN {
rest ¬ x.rest;
baseLen ¬ x.pos+baseLen;
base ¬ SafeConcat[base, x.base];
};
ENDCASE;
ENDCASE;
};
ENDCASE;
[] ¬ NonNeg[size-baseLen];
depth ¬ MAX[InlineDepth[base], InlineDepth[rest]] + 1;
new ¬
NEW[Tconcat ¬
[eightbit[0, node[depth: depth,
cases: concat[size: size, base: base, rest: rest, pos: baseLen]]]]];
IF checkBalance AND depth > MaxDepth THEN new ¬ NewBalance[new];
};
Replace:
PUBLIC
PROC [base:
ROPE, start:
INT ¬ 0, len:
INT ¬ MaxLen, with:
ROPE ¬
NIL]
RETURNS [new:
ROPE] =
TRUSTED {
baseSize: INT ¬ InlineSize[base];
repSize: INT ¬ InlineSize[with];
rem: INT ¬ NonNeg[baseSize - NonNeg[start]];
depth: INTEGER ¬ 0;
oldPos:
INT ¬ start +
(IF len < 0 THEN len ¬ 0 ELSE IF len > rem THEN len ¬ rem ELSE len);
newPos: INT ¬ CheckLongAdd[start,repSize];
size: INT ¬ CheckLongAdd[baseSize-len, repSize];
IF size = repSize THEN RETURN [with];
IF len = 0 AND repSize = 0 THEN RETURN [base]; -- identity check
IF size <= FlatMax
THEN {
result is small enough to be flat
str: Text ¬ NewText[QShort[size]];
index: TextBound ¬ 0;
AddChar:
PROC [c:
CHAR]
RETURNS [
BOOL] =
TRUSTED {
QStore[c, str, index];
index ¬ index + 1;
RETURN [FALSE];
};
IF start > 0 THEN [] ¬ Map[base, 0, start, AddChar];
IF repSize > 0 THEN [] ¬ Map[with, 0, repSize, AddChar];
IF oldPos < baseSize THEN [] ¬ Map[base, oldPos, baseSize, AddChar];
RETURN [str];
};
We need to make a new node. First, test for combining the replacement rope with a previous replacement rope, so that successive replacements at the same spot will not make ropes too deep.
(note: change base last, since it is aliased with x!)
WITH x: base
SELECT
FROM
node =>
WITH x: x
SELECT
FROM
replace => {
xnewPos: INT ¬ x.newPos;
xstart: INT ¬ x.start;
SELECT
TRUE
FROM
start <= xstart
AND oldPos >= xnewPos => {
replacing the replacement string
oldPos ¬ x.oldPos + (oldPos - xnewPos);
base ¬ x.base;
};
start = xnewPos => {
adding to old replace string
IF repSize + (xnewPos - xstart) <= FlatMax
THEN {
with ¬ Concat[x.replace, with];
start ¬ xstart;
oldPos ¬ x.oldPos + len;
base ¬ x.base;
};
};
ENDCASE;
};
ENDCASE;
ENDCASE;
[] ¬ NonNeg[NonNeg[newPos] - NonNeg[start]];
[] ¬ NonNeg[NonNeg[oldPos] - start];
[] ¬ NonNeg[NonNeg[size] - newPos];
depth ¬ MAX[InlineDepth[base], InlineDepth[with]] + 1;
new ¬
NEW[Treplace ¬
[eightbit[0, node[depth: depth, cases:
replace[size: size, base: base, replace: with, newPos: newPos, oldPos: oldPos, start: start]]]]];
IF depth > MaxDepth THEN new ¬ NewBalance[new];
};
InlineFetch:
PROC [base:
ROPE, index:
INT]
RETURNS [
CHAR] =
INLINE {
RETURN [Fetch[base, index]]
};
Fetch:
PUBLIC
PROC [base:
ROPE, index:
INT ¬ 0]
RETURNS [
CHAR] =
TRUSTED {
... fetches indexed character from given ropes. BoundsFault occurs if index < 0 or index is >= Length[base].
This first discrimination does the bounds checking and the quick kill for a flat rope.
WITH base
SELECT
FROM
text: Text => RETURN [text[index]];
node:
REF RopeRep.node => {
[] ¬ Basics.BoundsCheckInt[index, NodeSize[node]];
};
ENDCASE => Basics.RaiseBoundsFault[]; -- presumably NIL
Now we really don't need bounds checking, since checking the top level is sufficient. What we really need to do now is dive down to the right place as quickly as possible.
(note: change base last, since it is aliased with x!)
DO
WITH base
SELECT
FROM
x: REF TextRep => {RETURN[QFetch[x, index]]};
x:
REF RopeRep.node =>
WITH x: x
SELECT
FROM
substr => {index ¬ index + x.start; base ¬ x.base};
concat => {
IF index < x.pos THEN {base ¬ x.base; LOOP};
index ¬ index - x.pos;
base ¬ x.rest;
};
replace => {
IF index < x.start THEN {base ¬ x.base; LOOP};
IF index < x.newPos
THEN {
index ¬ index - x.start;
base ¬ x.replace;
LOOP;
};
index ¬ index - x.newPos + x.oldPos;
base ¬ x.base;
};
object => RETURN [x.fetch[x.base, index]];
ENDCASE => ERROR NoRope;
ENDCASE => ERROR NoRope;
ENDLOOP;
};
Map:
PUBLIC
PROC [base:
ROPE, start:
INT ¬ 0, len:
INT ¬ MaxLen, action: ActionType]
RETURNS [
BOOL] =
TRUSTED {
... applies the action to the given range of characters in the rope. Returns TRUE when some action returns TRUE. BoundsFault occurs when start < 0 or start > Length[base].
rem: INT ¬ NonNeg[InlineSize[base] - NonNeg[start]];
IF len > rem THEN len ¬ rem;
WHILE len > 0
DO
WITH x: base
SELECT
FROM
text => {
st: TextBound ¬ QShort[start];
FOR i: TextBound
IN [st..st+QShort[len])
DO
IF action[QFetch[@x,i]] THEN RETURN [TRUE];
ENDLOOP;
RETURN [FALSE];
};
node =>
(note: change base last, since it is aliased with x!)
WITH x: x
SELECT
FROM
substr => {start ¬ start + x.start; base ¬ x.base; LOOP};
concat => {
xpos: INT ¬ x.pos;
IF start+len <= xpos THEN {base ¬ x.base; LOOP};
IF start < xpos
THEN {
subLen: INT ¬ xpos-start;
IF Map[x.base, start, subLen, action] THEN RETURN [TRUE];
start ¬ xpos; len ¬ len - subLen;
};
start ¬ start - xpos;
base ¬ x.rest;
};
replace => {
xstart: INT ¬ x.start;
xnew: INT ¬ x.newPos;
IF start < xstart
THEN {
subLen: INT ¬ xstart-start;
IF subLen >= len THEN {base ¬ x.base; LOOP};
IF Map[x.base, start, subLen, action] THEN RETURN [TRUE];
start ¬ xstart;
len ¬ len - subLen;
};
IF start < xnew
THEN {
subLen: INT ¬ xnew-start;
st: INT ¬ start - xstart;
IF subLen >= len THEN {start ¬ st; base ¬ x.replace; LOOP};
IF Map[x.replace, st, subLen, action] THEN RETURN [TRUE];
start ¬ xnew;
len ¬ len - subLen;
};
start ¬ start - xnew + x.oldPos;
base ¬ x.base};
object => {
map: MapType ¬ x.map;
data: REF ¬ x.base;
IF map #
NIL
THEN RETURN[map[data, start, len, action]]
ELSE {
fetch: FetchType ¬ x.fetch;
FOR i:
INT
IN [start..start+len)
DO
IF action[fetch[data, i]] THEN RETURN [TRUE];
ENDLOOP;
RETURN [FALSE];
};
};
ENDCASE => ERROR NoRope;
ENDCASE => ERROR NoRope;
ENDLOOP;
RETURN [FALSE];
};
Translate:
PUBLIC
PROC [base:
ROPE, start:
INT ¬ 0, len:
INT ¬ MaxLen, translator: TranslatorType ¬
NIL]
RETURNS [new:
ROPE] =
TRUSTED {
applies the translation to get a new rope
if the resulting size > 0, then new does not share with the original rope!
if translator = NIL, the identity translation is performed
index: INT ¬ start;
intRem: INT ¬ NonNeg[InlineSize[base] - NonNeg[start]];
rem: INT ¬ intRem;
text: Text ¬ NIL;
IF len <= 0 OR rem = 0 THEN RETURN [emptyRope];
IF len < rem THEN rem ¬ len;
WITH base
SELECT
FROM
t: Text => {
short: TextBound ¬ index;
text ¬ NewText[rem];
FOR i: TextBound
IN [0..rem)
DO
c: CHAR ¬ QFetch[t, short];
IF translator # NIL THEN c ¬ translator[c];
text[i] ¬ c;
short ¬ short + 1;
ENDLOOP;
new ¬ text;
};
ENDCASE => {
each:
PROC
RETURNS [
CHAR] =
TRUSTED {
c: CHAR ¬ InlineFetch[base, index];
index ¬ index + 1;
IF translator # NIL THEN c ¬ translator[c];
RETURN [c];
};
RETURN [FromProc[rem, each]];
};
};
Lower:
PUBLIC TranslatorType ~ {
RETURN [Ascii.Lower[old]] };
Upper:
PUBLIC TranslatorType ~ {
RETURN [Ascii.Upper[old]] };
Flatten:
PUBLIC
PROC [base:
ROPE, start:
INT ¬ 0, len:
INT ¬ MaxLen]
RETURNS [Text] =
TRUSTED {
size: INT ¬ InlineSize[base];
rem: INT ¬ NonNeg[size - NonNeg[start]];
IF len > rem THEN len ¬ rem;
IF start = 0
AND len = rem
THEN {
IF base = NIL THEN RETURN [NIL];
IF base.tag = text THEN RETURN [LOOPHOLE[base]];
};
IF len <= 0
THEN RETURN [emptyRope]
ELSE {
rtn: Text = NewText[Short[len]];
rtn.length ¬ 0;
[] ¬ AppendChars[LOOPHOLE[rtn], base, start, len];
RETURN [rtn];
};
MakeRope:
PUBLIC
PROC
[base:
REF, size:
INT, fetch: FetchType, map: MapType, move: MoveType]
RETURNS [
ROPE] =
TRUSTED {
no optimization for user-supplied strings
IF size = 0 THEN RETURN [emptyRope];
RETURN [
NEW[Tobject ¬
[eightbit[0, node[depth: 1, cases: object[size: size, base: base, fetch: fetch, map: map, move: move]]]]]];
FromProc:
PUBLIC
PROC
[len:
INT, p:
PROC
RETURNS [
CHAR], maxPiece:
INT ¬ MaxLen]
RETURNS [
ROPE] =
TRUSTED {
IF len <= 0 THEN RETURN [emptyRope];
SELECT maxPiece
FROM
< FlatMax => maxPiece ¬ FlatMax;
> TextBound.LAST => maxPiece ¬ TextBound.LAST;
ENDCASE;
IF len <= maxPiece
THEN {
rtn: Text ~ NewText[QShort[len]];
FOR i: TextBound IN [0..QShort[len]) DO rtn[i] ¬ p[]; ENDLOOP;
RETURN [rtn];
}
ELSE {
Force proper evaluation order, since the compiler might get it backwards if left to its own devices.
left: ROPE ~ FromProc[len/2, p, maxPiece];
right: ROPE ~ FromProc[(len+1)/2, p, maxPiece];
RETURN [Concat[left, right]];
};
FromChars:
PUBLIC
PROC [genChars:
PROC [
PROC [
CHAR]]]
RETURNS [
ROPE] ~ {
genRopesFromChars:
PROC [putRope:
PROC [
ROPE]] ~ {
buf: REF TEXT ~ RefText.ObtainScratch[512];
putChar:
PROC [c:
CHAR] ~ {
text: REF TEXT ~ buf;
len: NAT ¬ text.length;
IF NOT len<text.maxLength THEN { putRope[FromRefText[text]]; len ¬ 0; };
text[len] ¬ c;
text.length ¬ len+1;
};
genChars[putChar];
IF buf.length>0 THEN putRope[FromRefText[buf]];
RefText.ReleaseScratch[buf];
};
RETURN[FromRopes[genRopesFromChars]];
};
FromRopes:
PUBLIC
PROC [genRopes:
PROC [
PROC [
ROPE]]]
RETURNS [result:
ROPE ¬
NIL] ~ {
stackSize: NAT ~ 10;
s: ARRAY [0..stackSize) OF ROPE ¬ ALL[NIL];
putRope:
PROC [rope:
ROPE] ~ {
FOR i:
NAT
DECREASING
IN [0..stackSize)
DO
IF s[i] = NIL THEN { s[i] ¬ rope; RETURN }
ELSE { rope ¬ Concat[s[i], rope]; s[i] ¬ NIL };
ENDLOOP;
s[0] ¬ rope;
};
genRopes[putRope];
FOR i:
NAT
DECREASING
IN [0..stackSize)
DO
IF s[i] # NIL THEN result ¬ Concat[s[i], result];
ENDLOOP;
};
fromChars: ARRAY CHAR OF Text ¬ ALL[NIL];
FromChar:
PUBLIC
PROC [c:
CHAR]
RETURNS [Text] =
TRUSTED {
rtn: Text ¬ fromChars[c];
IF rtn=
NIL
THEN {
rtn ¬ NewText[1];
rtn[0] ¬ c;
fromChars[c] ¬ rtn; --ok not to monitor
};
RETURN [rtn];
};
MakeConstantRope:
PUBLIC
PROC [char:
CHAR, length:
INT]
RETURNS [
ROPE] = {
RETURN MakeRope[NEW [Constant ¬ [char, length]], length, ConstFetch, ConstMap, ConstMove]};
Constant:
TYPE =
RECORD [char:
CHAR, length:
INT];
ConstFetch:
PROC [data:
REF, index:
INT]
RETURNS [
CHAR] = {
c: REF Constant = NARROW[data];
RETURN [c.char]};
ConstMap:
PROC [base:
REF, start, len:
INT, action: ActionType]
RETURNS [quit:
BOOL ¬
FALSE] = {
c: REF Constant = NARROW[base];
THROUGH [1 ..
MIN[c.length-start, len]]
DO
IF action[c.char] THEN RETURN [TRUE];
ENDLOOP;
RETURN [FALSE]};
ConstMove:
UNSAFE
PROC [block: Basics.UnsafeBlock, data:
REF, start:
INT]
RETURNS [charsMoved:
INT ¬ 0] =
UNCHECKED {
c: REF Constant = NARROW[data];
value: BYTE = ORD[c.char];
n: INT = MIN[c.length-start, block.count];
WHILE charsMoved<n
DO
count: CARDINAL ~ MIN[n-charsMoved, CARD16.LAST];
Basics.FillBytes[dstBase: block.base, dstStart: block.startIndex+charsMoved, count: count, value: value];
charsMoved ¬ charsMoved+count;
ENDLOOP;
};
FromRefText:
PUBLIC
PROC
[s:
REF
READONLY
TEXT, start: TextBound ¬ 0, len: TextBound ¬ TextBound.
LAST]
RETURNS [rtn: Text ¬
NIL] =
TRUSTED {
IF s #
NIL
THEN
TRUSTED {
rem: TextBound ~ s.length-start;
IF rem<len THEN len ¬ rem;
IF len = 0 THEN RETURN [emptyRope];
rtn ¬ NewText[len];
Basics.CopyBytes[
dstBase: RefText.BaseFromTextRope[rtn], dstStart: 0,
srcBase: RefText.BaseFromText[s], srcStart: start,
count: len];
};
ToRefText:
PUBLIC
PROC [base:
ROPE]
RETURNS [
REF
TEXT] =
TRUSTED {
len: TextBound ¬ Short[InlineSize[base]];
rtn: REF TEXT ¬ untracedZone.NEW[TEXT[len]];
IF len # 0
THEN {
WITH base
SELECT
FROM
txt: Text =>
TRUSTED {
Basics.CopyBytes[
dstBase: RefText.BaseFromText[rtn], dstStart: 0,
srcBase: RefText.BaseFromTextRope[txt], srcStart: 0,
count: len];
rtn.length ¬ len;
};
ENDCASE =>
Sigh, do it the hard way.
[] ¬ AppendChars[rtn, base, 0, len];
};
RETURN [rtn];
};
DoMoveChars:
UNSAFE
PROC
[pointer:
POINTER
TO Basics.RawBytes, index:
INT, rope:
ROPE, start:
INT, len:
INT] ~ {
We assume start IN[0..size] AND len IN[0..size-start], where size=Size[rope]
The effect is ... FOR i: INT IN[0..len) DO pointer[index+i] ← rope.Fetch[start+i] ENDLOOP
WHILE len # 0
DO
base: ROPE; bStart, bLen: INT;
[base, bStart, bLen] ¬ ContainingPiece[rope, start];
grab the smallest piece of the rope containing the given start index
IF bLen > len
THEN bLen ¬ len;
this piece is longer than we need to finish the transfer
IF bLen = 0
THEN
ERROR NoRope;
this should not happen! it means that some calculation or invariant is bad!
WITH base
SELECT
FROM
txt: Text =>
TRUSTED {
We are moving chars from a flat rope.
Basics.CopyBytes[
dstBase: pointer, dstStart: index,
srcBase: RefText.BaseFromTextRope[txt], srcStart: bStart,
count: bLen];
};
n:
REF RopeRep.node => {
WITH n
SELECT
FROM
obj:
REF RopeRep.node.object =>
We are moving chars from a user-defined object.
SELECT
TRUE
FROM
obj.move #
NIL =>
TRUSTED {
The user has supplied a fast move routine, so use it.
moved: INT ¬ obj.move[[pointer, index, bLen], obj.base, bStart];
IF moved # bLen
THEN
ERROR NoRope;
We asked for a perfectly legitimate # of chars, and the user's move routine blew it! This rope is not reliable.
};
obj.map #
NIL => {
The user has supplied a fast map routine, so use it.
toPointer: RawCharsPtr ¬ LOOPHOLE[pointer];
toIndex: INT ¬ index;
moved: INT ¬ 0;
action: ActionType =
TRUSTED {
IF toIndex > TextBound.
LAST
THEN {
toPointer ¬ toPointer+(toIndex/charsPerWord)*UNITS[WORD];
toIndex ¬ toIndex MOD charsPerWord;
};
toPointer[toIndex] ¬ c;
toIndex ¬ toIndex + 1;
moved ¬ moved + 1;
};
[] ¬ obj.map[obj.base, bStart, bLen, action];
IF moved # bLen
THEN
ERROR NoRope;
We asked for a perfectly legitimate # of chars, and the user's map routine blew it! This rope is not reliable.
};
ENDCASE =>
TRUSTED {
Sigh. We have to do this one ourselves.
toPointer: RawCharsPtr ¬ LOOPHOLE[pointer];
toIndex: INT ¬ index;
fetch: FetchType ¬ obj.fetch;
data: REF ¬ obj.base;
FOR i:
INT
IN [0..bLen)
DO
IF toIndex > TextBound.
LAST
THEN {
toPointer ¬ toPointer+(toIndex/charsPerWord)*UNITS[WORD];
toIndex ¬ toIndex MOD charsPerWord;
};
toPointer[toIndex] ¬ fetch[data, bStart+i];
toIndex ¬ toIndex + 1;
ENDLOOP;
};
ENDCASE => ERROR NoRope; -- this should not happen!
};
ENDCASE => ERROR NoRope; -- this should not happen!
len ¬ len - bLen;
update the # of chars left in the transfer
start ¬ start + bLen;
start is now the starting index for the next piece
index ¬ index + bLen;
index is now the destination index for the next piece
ENDLOOP;
};
AppendChars:
PUBLIC
PROC
[buffer:
REF
TEXT, rope:
ROPE, start:
INT ¬ 0, len:
INT ¬
LAST[
INT]]
RETURNS [charsMoved: TextBound ¬ 0] = {
... appends characters to the end of a REF TEXT buffer, starting at start within the rope. The move stops if there are no more characters from the rope OR len characters have been moved OR the buffer is full (buffer.length = buffer.maxLength). charsMoved is always the # of characters appended. NOTE: the user is responsible for protecting buffer from concurrent modifications.
rem:
INT ¬ NonNeg[InlineSize[rope]-NonNeg[start]];
# of characters in rope after start
IF rem > len
THEN rem ¬ len;
The user may have specified a shorter run of characters
IF rem > 0
AND buffer #
NIL
THEN
TRUSTED {
bufPos: TextBound ¬ buffer.length;
position of next place to append character (cache for buffer.length)
bufRem: TextBound ¬ buffer.maxLength - bufPos;
# of chars remaining in the transfer
IF bufRem > rem
THEN bufRem ¬ QShort[rem];
the caller may have specified a shorter amount
DoMoveChars[
pointer: RefText.BaseFromText[buffer],
index: bufPos, rope: rope, start: start, len: bufRem];
buffer.length ¬ bufPos+bufRem;
RETURN [bufRem];
};
};
UnsafeMoveChars:
PUBLIC
UNSAFE
PROC [block: UnsafeBlock, rope:
ROPE, start:
INT ¬ 0]
RETURNS [charsMoved:
INT ¬ 0] ~ {
... moves characters to an UnsafeBlock, starting at start within the rope. The move stops if there are no more characters from the rope OR block.count characters have been moved; charsMoved is always the # of characters moved. This UNSAFE operation is mainly for the implementation of IO.RIS. Most clients should use AppendChars.
rem:
INT ¬ NonNeg[InlineSize[rope]-NonNeg[start]];
# of characters in rope after start
IF rem > block.count
THEN rem ¬ block.count;
The user may have specified a shorter run of characters
IF rem > 0
THEN
TRUSTED {
DoMoveChars[
pointer: block.base,
index: NonNeg[block.startIndex], rope: rope, start: start, len: rem];
RETURN [rem];
};
};
Equal:
PUBLIC
PROC [s1, s2:
ROPE ¬
NIL, case:
BOOL ¬
TRUE]
RETURNS [
BOOL] =
TRUSTED {
contents equality of s1 and s2
len1,len2: INT;
str1, str2: Text;
[len1, str1] ¬ SingleSize[s1];
[len2, str2] ¬ SingleSize[s2];
SELECT
TRUE
FROM
len1 # len2 => RETURN [FALSE];
s1 = s2 OR len1 = 0 => RETURN [TRUE];
case
AND str1 #
NIL
AND str2 #
NIL => {
relatively cheap test for equality
len: TextBound ¬ QShort[len1];
wp1: WordPtr ¬ LOOPHOLE[str1, WordPtr] + SIZE[TEXT[0]];
wp2: WordPtr ¬ LOOPHOLE[str2, WordPtr] + SIZE[TEXT[0]];
WHILE len >= charsPerWord
DO
Run through the words first
IF wp1 # wp2 THEN RETURN [FALSE];
wp1 ¬ wp1 + UNITS[WORD];
wp2 ¬ wp2 + UNITS[WORD];
len ¬ len - charsPerWord;
ENDLOOP;
FOR i: TextBound
IN [0..len
MOD charsPerWord)
DO
Run through the remaining characters
IF
LOOPHOLE[wp1, RawCharsPtr][i] #
LOOPHOLE[wp2, RawCharsPtr][i]
THEN
RETURN [FALSE];
ENDLOOP;
RETURN [TRUE];
};
ENDCASE =>
RETURN [Compare[s1,s2,case] = equal];
};
Compare:
PUBLIC
PROC [s1, s2:
ROPE ¬
NIL, case:
BOOL ¬
TRUE]
RETURNS [Comparison] =
TRUSTED {
contents comparison of s1 and s2 (less(0), equal(1), greater(2))
if NOT case, then upper case chars = lower case chars
contents equality of s1 and s2
len1,len2: INT;
str1, str2: Text;
[len1, str1] ¬ SingleSize[s1];
[len2, str2] ¬ SingleSize[s2];
IF str1 #
NIL
AND str2 #
NIL
THEN {
The two ropes are both flat, so we don't have to discriminate for every character.
sz1: TextBound ¬ QShort[len1];
sz2: TextBound ¬ QShort[len2];
sz: TextBound ¬ MIN[sz1, sz2];
IF case
THEN
FOR i: TextBound
IN [0..sz)
DO
c1: CHAR ¬ QFetch[str1, i];
c2: CHAR ¬ QFetch[str2, i];
IF c1 = c2 THEN LOOP;
IF c1 < c2 THEN RETURN [less] ELSE RETURN [greater];
ENDLOOP
ELSE
FOR i: TextBound
IN [0..sz)
DO
c1: CHAR ¬ QFetch[str1, i];
c2: CHAR ¬ QFetch[str2, i];
IF c1 <= 'Z AND c1 >= 'A THEN c1 ¬ c1 + ('a-'A);
IF c2 <= 'Z AND c2 >= 'A THEN c2 ¬ c2 + ('a-'A);
IF c1 = c2 THEN LOOP;
IF c1 < c2 THEN RETURN [less] ELSE RETURN [greater];
ENDLOOP;
IF sz1 > sz2 THEN RETURN [greater];
IF sz1 < sz2 THEN RETURN [less];
RETURN [equal];
};
{
At least one rope is not flat, so we do it the hard way.
r1,r2: ROPE ¬ NIL;
pos1,st1,sz1,lm1: INT ¬ 0;
pos2,st2,sz2,lm2: INT ¬ 0;
c1,c2: CHAR;
DO
IF st1 = lm1
THEN {
need a new piece from s1
IF (pos1 ¬ pos1 + sz1) = len1
THEN RETURN [IF pos1 = len2 THEN equal ELSE less];
[r1, st1, sz1] ¬ ContainingPiece[s1, pos1];
IF sz1 = 0 THEN ERROR;
lm1 ¬ st1 + sz1;
};
IF st2 = lm2
THEN {
need a new piece from s2
IF (pos2 ¬ pos2 + sz2) = len2 THEN RETURN [greater];
[r2, st2, sz2] ¬ ContainingPiece[s2, pos2];
IF sz2 = 0 THEN ERROR;
lm2 ¬ st2 + sz2;
};
c1 ¬ InlineFetch[r1, st1];
c2 ¬ InlineFetch[r2, st2];
IF
NOT case
THEN {
IF c1 <= 'Z AND c1 >= 'A THEN c1 ¬ c1 + ('a-'A);
IF c2 <= 'Z AND c2 >= 'A THEN c2 ¬ c2 + ('a-'A);
};
IF c1 # c2 THEN RETURN [IF c1 < c2 THEN less ELSE greater];
st1 ¬ st1 + 1; st2 ¬ st2 + 1;
ENDLOOP;
};
CompareSubstrs:
PUBLIC
PROC [
s1:
ROPE, start1:
INT ¬ 0, len1:
INT ¬ MaxLen,
s2:
ROPE, start2:
INT ¬ 0, len2:
INT ¬ MaxLen,
case:
BOOL ¬
TRUE]
RETURNS [Comparison] ~ {
rem1: INT ¬ IF len1 <= 0 THEN 0 ELSE MIN[len1, NonNeg[InlineSize[s1]-NonNeg[start1]]];
rem2: INT ¬ IF len2 <= 0 THEN 0 ELSE MIN[len2, NonNeg[InlineSize[s2]-NonNeg[start2]]];
rem: INT ¬ MIN[rem1, rem2];
r1, r2: ROPE ¬ NIL;
st1, sz1, lm1: INT ¬ 0;
st2, sz2, lm2: INT ¬ 0;
WHILE rem # 0
DO
IF st1 = lm1
THEN {
need a new piece from s1
[r1, st1, sz1] ¬ ContainingPiece[s1, start1 ¬ start1 + sz1];
IF sz1 = 0 THEN EXIT;
lm1 ¬ st1 + sz1;
};
IF st2 = lm2
THEN {
need a new piece from s2
[r2, st2, sz2] ¬ ContainingPiece[s2, start2 ¬ start2 + sz2];
IF sz2 = 0 THEN EXIT;
lm2 ¬ st2 + sz2;
};
{
c1: CHAR ¬ InlineFetch[r1, st1];
c2: CHAR ¬ InlineFetch[r2, st2];
IF c1 # c2
THEN {
IF
NOT case
THEN {
IF c1 <= 'Z AND c1 >= 'A THEN c1 ¬ c1 + ('a-'A);
IF c2 <= 'Z AND c2 >= 'A THEN c2 ¬ c2 + ('a-'A);
IF c1 = c2 THEN GO TO equal;
};
RETURN [Basics.CompareInt[ORD[c1], ORD[c2]]];
EXITS equal => {};
};
};
st1 ¬ st1 + 1;
st2 ¬ st2 + 1;
rem ¬ rem - 1;
ENDLOOP;
RETURN [Basics.CompareInt[rem1, rem2]];
};
EqualSubstrs:
PUBLIC
PROC [
s1:
ROPE, start1:
INT ¬ 0, len1:
INT ¬ MaxLen,
s2:
ROPE, start2:
INT ¬ 0, len2:
INT ¬ MaxLen,
case:
BOOL ¬
TRUE]
RETURNS [
BOOL] ~ {
rem1: INT ¬ IF len1 <= 0 THEN 0 ELSE MIN[len1, NonNeg[InlineSize[s1]-NonNeg[start1]]];
rem2: INT ¬ IF len2 <= 0 THEN 0 ELSE MIN[len2, NonNeg[InlineSize[s2]-NonNeg[start2]]];
IF rem1 = rem2
THEN {
r1, r2: ROPE ¬ NIL;
st1, sz1, lm1: INT ¬ 0;
st2, sz2, lm2: INT ¬ 0;
WHILE rem1 # 0
DO
IF st1 = lm1
THEN {
need a new piece from s1
[r1, st1, sz1] ¬ ContainingPiece[s1, start1 ¬ start1 + sz1];
IF sz1 = 0 THEN EXIT;
lm1 ¬ st1 + sz1;
};
IF st2 = lm2
THEN {
need a new piece from s2
[r2, st2, sz2] ¬ ContainingPiece[s2, start2 ¬ start2 + sz2];
IF sz2 = 0 THEN EXIT;
lm2 ¬ st2 + sz2;
};
{
c1: CHAR ¬ InlineFetch[r1, st1];
c2: CHAR ¬ InlineFetch[r2, st2];
IF c1 # c2
THEN {
IF
NOT case
THEN {
IF c1 <= 'Z AND c1 >= 'A THEN c1 ¬ c1 + ('a-'A);
IF c2 <= 'Z AND c2 >= 'A THEN c2 ¬ c2 + ('a-'A);
IF c1 = c2 THEN GO TO equal;
};
RETURN [FALSE];
EXITS equal => {};
};
};
st1 ¬ st1 + 1;
st2 ¬ st2 + 1;
rem1 ¬ rem1 - 1;
ENDLOOP;
RETURN [TRUE];
};
RETURN [FALSE];
};
ContainingPiece:
PUBLIC
PROC [rope:
ROPE, index:
INT ¬ 0]
RETURNS [base:
ROPE, start:
INT, len:
INT] =
TRUSTED {
find the largest piece containg the given index
such that the resulting rope is either the text or the object variant
(NIL, 0, 0) is returned if the index is NOT in the given rope
len ¬ InlineSize[rope];
IF index < 0 OR index >= len THEN RETURN [NIL, 0, 0];
base ¬ rope;
start ¬ index;
len ¬ len - start;
DO
nlen: INT ¬ len;
WITH x: base
SELECT
FROM
(note: change base last, since it is aliased with x!)
text => RETURN;
node =>
WITH x: x
SELECT
FROM
substr => {
nlen ¬ x.size - start;
start ¬ start + x.start;
base ¬ x.base;
};
concat => {
del1: INT ¬ x.pos - start;
IF del1 > 0
THEN {nlen ¬ del1; base ¬ x.base}
ELSE {nlen ¬ x.size - start; start ¬ -del1; base ¬ x.rest};
};
replace => {
del2: INT ¬ x.newPos - start;
del1: INT ¬ x.start - start;
SELECT
TRUE
FROM
del1 > 0 => {nlen ¬ del1; base ¬ x.base};
del2 > 0 => {start ¬ -del1; nlen ¬ del2; base ¬ x.replace};
ENDCASE => {
nlen ¬ x.size - start;
start ¬ x.oldPos - del2;
base ¬ x.base;
};
};
object => {RETURN};
ENDCASE => ERROR NoRope;
ENDCASE => ERROR NoRope;
IF nlen < len THEN len ¬ NonNeg[nlen];
ENDLOOP;
};
IsEmpty:
PUBLIC
PROC [r:
XROPE]
RETURNS [
BOOL] = {
RETURN [Size[r]=0];
};
Length:
PUBLIC
PROC [base:
XROPE]
RETURNS [
INT] = {
RETURN [Size[base]];
};
InlineSize:
PROC [base:
ROPE]
RETURNS [
INT] = {
SELECT
TRUE
FROM
base = NIL => RETURN [0];
base.tag = text => RETURN [base.length];
ENDCASE =>
TRUSTED {
Relys on size field being in the same place in all node variants:
RETURN [LOOPHOLE[base, REF Tobject].size];
};
};
Size:
PUBLIC
PROC [base:
XROPE]
RETURNS [
INT] = {
WITH base
SELECT
FROM
base:
REF XRopeRep.eightbit => {
IF base.tag = text THEN RETURN [base.length];
TRUSTED {
Relys on size field being in the same place in all node variants:
RETURN [LOOPHOLE[base, REF Tobject].size];
};
};
base: REF XRopeRep.extended => RETURN [base.size];
ENDCASE => RETURN [0];
};
QShort:
PROC [x:
INT]
RETURNS [TextBound] =
TRUSTED
INLINE {
This "routine" is meant to convert an INT to a TextBound without checking in cases where the INT is known from previous tests to be in bounds. Use Short to actually perform the checking.
RETURN [LOOPHOLE[x]];
};
Formerly in RopeImplExt
Run:
PUBLIC
PROC
[s1:
ROPE, pos1:
INT, s2:
ROPE, pos2:
INT, case:
BOOL ¬
TRUE, len:
INT ¬ MaxLen]
RETURNS [result:
INT ¬ 0] =
TRUSTED {
Returns the largest number of characters N <= len such that s1 starting at pos1 is equal to s2 starting at pos2 for N characters. More formally: FOR i IN [0..N): s1[pos1+i] = s2[pos2+i]. If case is true, then case matters, otherwise upper and lower case characters are considered equal.
len1: INT;
str1: Text;
[len1, str1] ¬ SingleSize[s1];
IF NonNeg[pos1] < len1
THEN {
len2: INT;
str2: Text;
[len2, str2] ¬ SingleSize[s2];
IF NonNeg[pos2] < len2
THEN {
r1,r2: ROPE ¬ NIL;
st1,sz1,lm1: INT ¬ 0;
st2,sz2,lm2: INT ¬ 0;
WHILE result<len
DO
IF st1 = lm1
THEN {
need a new piece from s1
[r1, st1, sz1] ¬ ContainingPiece[s1, pos1 ¬ pos1 + sz1];
IF sz1 = 0 THEN RETURN;
lm1 ¬ st1 + sz1;
};
IF st2 = lm2
THEN {
need a new piece from s2
[r2, st2, sz2] ¬ ContainingPiece[s2, pos2 ¬ pos2 + sz2];
IF sz2 = 0 THEN RETURN;
lm2 ¬ st2 + sz2;
};
{
c1: CHAR ¬ InlineFetch[r1, st1];
c2: CHAR ¬ InlineFetch[r2, st2];
IF c1 # c2
THEN {
IF
NOT case
THEN {
IF c1 <= 'Z AND c1 >= 'A THEN c1 ¬ c1 + ('a-'A);
IF c2 <= 'Z AND c2 >= 'A THEN c2 ¬ c2 + ('a-'A);
IF c1 = c2 THEN GO TO equal;
};
RETURN;
EXITS equal => {};
};
};
result ¬ result + 1;
st1 ¬ st1 + 1; st2 ¬ st2 + 1;
ENDLOOP;
};
};
IsPrefix:
PUBLIC
PROC [prefix:
ROPE, subject:
ROPE, case:
BOOL ¬
TRUE]
RETURNS [
BOOL] ~ {
RETURN [Run[prefix, 0, subject, 0, case]=InlineSize[prefix]];
};
Index:
PUBLIC
PROC
[s1:
ROPE, pos1:
INT, s2:
ROPE, case:
BOOL ¬
TRUE]
RETURNS [
INT] =
TRUSTED {
Returns the smallest character position N such that s2 occurs in s1 at N and N >= pos1. If s2 does not occur in s1 at or after pos1, s1.length is returned. pos1 <= N < s1.length => FOR i IN [0..s2.length): s1[N+i] = s2[i]; N = s1.length => s2 does not occur in s1. If case is true, then case matters, otherwise upper and lower case characters are considered equal.
len1,len2, rem: INT; both: BOOL;
[len1,len2,both] ¬ DoubleSize[s1, s2];
rem ¬ IF pos1 >= len1 THEN 0 ELSE len1 - NonNeg[pos1];
IF rem >= len2
THEN {
c: CHAR;
IF len2 = 0 THEN RETURN [pos1];
c ¬ InlineFetch[s2, 0];
rem ¬ rem - len2 + 1;
IF case
THEN {
Cmp:
PROC [cc:
CHAR]
RETURNS [
BOOL] =
TRUSTED {
IF c = cc AND Run[s1, pos1+1, s2, 1, case]+1 = len2 THEN RETURN [TRUE];
pos1 ¬ pos1 + 1; RETURN [FALSE];
};
IF Map[s1, pos1, rem, Cmp] THEN RETURN [pos1]
}
ELSE {
LCmp:
PROC [cc:
CHAR]
RETURNS [
BOOL] =
TRUSTED {
IF cc <= 'Z AND cc >= 'A THEN cc ¬ cc + ('a-'A);
IF c = cc AND Run[s1, pos1+1, s2, 1, case]+1 = len2 THEN RETURN [TRUE];
pos1 ¬ pos1 + 1; RETURN [FALSE];
};
IF c <= 'Z AND c >= 'A THEN c ¬ c + ('a-'A);
IF Map[s1, pos1, rem, LCmp] THEN RETURN [pos1];
};
};
RETURN [len1];
};
Find:
PUBLIC
PROC [s1, s2:
ROPE, pos1:
INT ¬ 0, case:
BOOL ¬
TRUE]
RETURNS [
INT] = {
index: INT ¬ Index[s1, pos1, s2, case];
IF index = InlineSize[s1] THEN RETURN [-1];
RETURN [index];
};
FindBackward:
PUBLIC
PROC
[s1, s2:
ROPE, pos1:
INT ¬ MaxLen, case:
BOOL ¬
TRUE]
RETURNS [
INT] = {
len1,len2: INT; both: BOOL;
[len1,len2,both] ¬ DoubleSize[s1, s2];
IF NonNeg[pos1]>len1 THEN pos1 ¬ len1;
IF len2 = 0 THEN RETURN [pos1];
IF len1 >= len2
THEN {
c2: CHAR ¬ InlineFetch[s2, 0]; -- first char of pattern
rem2: INT ~ len2-1; -- remaining chars in pattern
IF (len1-pos1)<len2 THEN pos1 ¬ len1-len2;
IF case
THEN {
FOR i:
INT
DECREASING
IN[0..pos1]
DO
c1: CHAR ~ InlineFetch[s1, i];
IF c1=c2 AND (rem2=0 OR Run[s1, i+1, s2, 1, case]=rem2) THEN RETURN [i];
ENDLOOP;
}
ELSE {
IF c2 <= 'Z AND c2 >= 'A THEN c2 ¬ c2 + ('a-'A);
FOR i:
INT
DECREASING
IN[0..pos1]
DO
c1: CHAR ¬ InlineFetch[s1, i];
IF c1 <= 'Z AND c1 >= 'A THEN c1 ¬ c1 + ('a-'A);
IF c1=c2 AND (rem2=0 OR Run[s1, i+1, s2, 1, case]=rem2) THEN RETURN [i];
ENDLOOP;
};
};
RETURN [-1];
};
Match:
PUBLIC
PROC [pattern, object:
ROPE, case:
BOOL ¬
TRUE]
RETURNS [
BOOL] =
TRUSTED {
Returns TRUE if the object matches the pattern, where the pattern may contain * to indicate that 0 or more characters will match. Returns FALSE otherwise. For example, using a*b as a pattern, some matching objects are: ab, a#b, a###b, and some not matching objects: abc, cde, bb, a, Ab. If case is true, then case matters, otherwise upper and lower case characters are considered equal.
submatch:
PROC [i1:
INT, len1:
INT, i2:
INT, len2:
INT]
RETURNS [
BOOL] =
TRUSTED {
WHILE len1 > 0
DO
c1: CHAR ¬ InlineFetch[pattern, i1];
IF c1 = '*
THEN {
quick kill for * at end of pattern
IF len1 = 1 THEN RETURN [TRUE];
else must take all combinations
{
-- first, accept the *
j1: INT ¬ i1 + 1;
nlen1: INT ¬ len1 - 1;
j2: INT ¬ i2;
nlen2: INT ¬ len2;
WHILE nlen2 >= 0
DO
IF submatch[j1, nlen1, j2, nlen2] THEN RETURN [TRUE];
j2 ¬ j2 + 1;
nlen2 ¬ nlen2 - 1;
ENDLOOP;
};
RETURN [FALSE];
};
IF len2 <= 0 THEN RETURN [FALSE];
at this point demand an exact match in both strings
{c2:
CHAR ¬ InlineFetch[object, i2];
IF
NOT case
THEN {
IF c1 <= 'Z AND c1 >= 'A THEN c1 ¬ c1 + ('a-'A);
IF c2 <= 'Z AND c2 >= 'A THEN c2 ¬ c2 + ('a-'A);
};
IF c1 # c2 THEN RETURN [FALSE];
};
i1 ¬ i1 + 1;
len1 ¬ len1 - 1;
i2 ¬ i2 + 1;
len2 ¬ len2 - 1;
ENDLOOP;
RETURN [len2 = 0];
};
len1: INT ¬ InlineSize[pattern];
len2: INT ¬ InlineSize[object];
First, strip off the common tails until they differ (quick kill false), or the pattern has a * at the tail. This strip is easy, because we just decrement the lengths.
WHILE len1 > 0
DO
n: INT ¬ len1 - 1;
c1: CHAR ¬ InlineFetch[pattern, n];
c2: CHAR;
IF c1 = '* THEN EXIT;
IF len2 = 0 THEN RETURN [FALSE];
len1 ¬ n;
len2 ¬ len2 - 1;
c2 ¬ InlineFetch[object, len2];
IF
NOT case
THEN {
IF c1 <= 'Z AND c1 >= 'A THEN c1 ¬ c1 + ('a-'A);
IF c2 <= 'Z AND c2 >= 'A THEN c2 ¬ c2 + ('a-'A);
};
IF c1 # c2 THEN RETURN [FALSE];
ENDLOOP;
RETURN [submatch [0, len1, 0, len2]];
};
SkipTo:
PUBLIC
PROC [s:
ROPE, pos:
INT, skip:
ROPE]
RETURNS [
INT] =
TRUSTED {
... returns the lowest position N in s such that s[N] is in the skip string and N >= pos. If no such character occurs in s, then return s.length.
len: INT ¬ InlineSize[s];
skipText: Rope.Text = Flatten[skip];
skiplen: NAT ¬ IF skipText = NIL THEN 0 ELSE skipText.length;
IF pos < len
AND skiplen # 0
THEN {
CharMatch:
PROC [c:
CHAR]
RETURNS [
BOOL] =
TRUSTED {
FOR i:
NAT
IN [0..skiplen)
DO
IF c = skipText[i] THEN RETURN [TRUE];
ENDLOOP;
pos ¬ pos + 1;
RETURN [FALSE];
};
IF Map[s, pos, len - pos, CharMatch] THEN RETURN [pos];
};
RETURN [len];
};
SkipOver:
PUBLIC
PROC [s:
ROPE, pos:
INT, skip:
ROPE]
RETURNS [
INT] =
TRUSTED {
... return the lowest position N in s such that s[N] is NOT in the skip string and N >= pos. If no such character occurs in s, then return s.length.
len: INT ¬ InlineSize[s];
skipText: Rope.Text = Flatten[skip];
skiplen: NAT ¬ IF skipText = NIL THEN 0 ELSE skipText.length;
IF pos >= len THEN RETURN [len];
IF skiplen # 0
THEN {
CharMatch:
PROC [c:
CHAR]
RETURNS [
BOOL] =
TRUSTED {
FOR i:
NAT
IN [0..skiplen)
DO
IF c = skipText[i] THEN GO TO found;
ENDLOOP;
RETURN [TRUE];
EXITS found => {pos ¬ pos + 1; RETURN [FALSE]};
};
IF Map[s, pos, len - pos, CharMatch] THEN RETURN [pos];
};
RETURN [pos];
};
VerifyStructure:
PUBLIC
PROC
[s:
ROPE]
RETURNS [leaves,nodes,maxDepth:
INT ¬ 0] =
TRUSTED {
... traverses the structure of the given rope object; extra checking is performed to verify invariants.
IF s = NIL THEN RETURN;
WITH x: s
SELECT
FROM
text => {
leaves ¬ 1;
IF x.length > x.max THEN ERROR VerifyFailed};
node =>
WITH x: x
SELECT
FROM
substr => {
ref: ROPE ¬ x.base;
len1: INT ¬ Size[x.base];
IF x.start < 0 OR x.size <= 0 THEN ERROR VerifyFailed;
IF len1 < x.start + x.size THEN ERROR VerifyFailed;
[leaves, nodes, maxDepth] ¬ VerifyStructure[ref];
nodes ¬ nodes + 1};
concat => {
leaves1,nodes1,maxDepth1: INT;
left: ROPE ¬ x.base;
lSize: INT ¬ Size[left];
right: ROPE ¬ x.rest;
rSize: INT ¬ Size[right];
[leaves1, nodes1, maxDepth1] ¬ VerifyStructure[left];
[leaves, nodes, maxDepth] ¬ VerifyStructure[right];
leaves ¬ leaves + leaves1;
nodes ¬ nodes + nodes1 + 1;
IF maxDepth1 > maxDepth THEN maxDepth ¬ maxDepth1;
IF x.size # lSize + rSize THEN ERROR VerifyFailed;
IF x.pos # lSize THEN ERROR VerifyFailed;
};
replace => {
leaves1,nodes1,maxDepth1: INT;
old: ROPE ¬ x.base;
oldSize: INT ¬ Size[old];
repl: ROPE ¬ x.replace;
replSize: INT ¬ Size[repl];
[leaves, nodes, maxDepth] ¬ VerifyStructure[old];
[leaves1, nodes1, maxDepth1] ¬ VerifyStructure[repl];
leaves ¬ leaves + leaves1;
nodes ¬ nodes + nodes1 + 1;
IF maxDepth < maxDepth1 THEN maxDepth ¬ maxDepth1;
IF x.start > x.oldPos OR x.start > x.newPos THEN ERROR VerifyFailed;
IF x.newPos - x.start # replSize THEN ERROR VerifyFailed;
IF x.start < 0 OR x.start > x.size THEN ERROR VerifyFailed;
IF x.oldPos > oldSize THEN ERROR VerifyFailed;
};
object => {
leaves ¬ 1;
IF x.size < 0 OR x.fetch = NIL THEN ERROR VerifyFailed;
};
ENDCASE => ERROR NoRope;
ENDCASE => ERROR NoRope;
maxDepth ¬ maxDepth + 1;
IF maxDepth # InlineDepth[s] THEN ERROR VerifyFailed;
};
VerifyFailed:
PUBLIC
ERROR =
CODE;
Balance implementation
This implementation is courtesy of Michael Plass (March 1985, revised September 1993).
Balance:
PUBLIC
PROC [base:
ROPE, start:
INT ¬ 0, len:
INT ¬ MaxLen, flat:
INT ¬ FlatMax]
RETURNS [new:
ROPE ¬
NIL] = {
This procedure is here to match the new implementation against the old definition.
RETURN [NewBalance[Substr[base, start, len]]];
};
Stopper:
TYPE =
PROC [part: Part]
RETURNS [
BOOL];
Part:
TYPE ~
RECORD [base:
ROPE, start:
INT, end:
INT];
Must have 0 <= start <= end <= Size[base];
HeightArray: TYPE = ARRAY [0..MaxDepth) OF INT;
minSizeForHeight: REF HeightArray ¬ NIL;
InitMinSizeForHeight:
PROC ~ {
Initializes the height array. Does not need monitoring, since the array is completely initialized by the time the assignment occurs, and REF assignment is atomic. Races can only cause a little extra allocation.
IF minSizeForHeight =
NIL
THEN {
h: REF HeightArray ¬ untracedZone.NEW[HeightArray];
h[0] ¬ 0;
A NIL rope has no characters and height 0.
h[1] ¬ 1;
A flat rope ought to have at least one character.
h[2] ¬ Rope.FlatMax+1;
Must be at least this big to warrant any non-flat structure.
FOR i:
NAT
IN [3..MaxDepth)
DO
Use Fibonacci recurrence to compute rest.
Be careful about overflow here...
IF
INT.
LAST - h[i-1] < h[i-2]
THEN h[i] ¬ INT.LAST
ELSE h[i] ¬ h[i-1] + h[i-2];
ENDLOOP;
minSizeForHeight ¬ h;
};
};
PartIsBalanced: Stopper ~ {
Examines only the root.
size: INT ~ InlineSize[part.base];
height: INT ~ InlineDepth[part.base];
IF part.start # 0
OR part.end # size
OR height >= MaxDepth
THEN RETURN [FALSE];
IF minSizeForHeight = NIL THEN InitMinSizeForHeight[];
IF size < minSizeForHeight[height] THEN RETURN [FALSE];
WITH part.base
SELECT
FROM
substr: REF Rope.RopeRep.node.substr => RETURN [height<=1];
concat: REF Rope.RopeRep.node.concat => RETURN [TRUE];
replace: REF Rope.RopeRep.node.replace => RETURN [FALSE];
ENDCASE => RETURN [TRUE];
};
NewBalance:
PROC [rope:
ROPE]
RETURNS [
ROPE] ~ {
a: ARRAY [0..MaxDepth+6] OF ROPE ¬ ALL[NIL];
aN: INT ¬ 0;
The ordered concatenation of the a[i] is the result-so-far.
Assume and maintain this invariant: depth[a[i-1]] > depth[a[i]], for i IN (0..aN)
Collapse:
PROC [d:
INTEGER] ~ {
Makes depth[a[aN-1]] >= d, while maintaining the invariant.
UNTIL aN < 2
OR InlineDepth[a[aN-1]] >= d
DO
r: ROPE = SafeConcat[a[aN-2], a[aN-1]];
a[aN-1] ¬ NIL;
a[aN-2] ¬ NIL;
aN ¬ aN - 2;
Collapse[InlineDepth[r]];
Build[r];
ENDLOOP;
};
Build:
PROC [r:
ROPE] ~ {
Requires the invariant, plus (aN = 0) OR (depth[a[aN-1]] >= depth[r])
Appends r to the current collection of ropes.
UNTIL aN = 0
OR InlineDepth[a[aN-1]] > InlineDepth[r]
DO
r ¬ SafeConcat[a[aN - 1], r];
aN ¬ aN - 1;
ENDLOOP;
a[aN] ¬ r;
aN ¬ aN + 1;
};
SavePart:
PROC [part: Part] ~ {
IF part.end > part.start
THEN {
Since this part is balanced (it has passed PartIsBalanced), we know the depth is small enough so that the Substr call will not trigger a recursive call to NewBalance
r: ROPE ¬ Substr[part.base, part.start, part.end-part.start];
d: INTEGER ¬ InlineDepth[r];
Collapse[d];
UNTIL aN = 0
OR InlineDepth[a[aN-1]] > d
DO
r ¬ SafeConcat[a[aN - 1], r];
d ¬ InlineDepth[r];
aN ¬ aN - 1;
ENDLOOP;
a[aN] ¬ r;
aN ¬ aN + 1;
};
};
part: Part ~ [rope, 0, Size[rope]];
stopper: Stopper ~
IF InlineDepth[rope] > MaxDepth+1
THEN
NIL
ELSE PartIsBalanced;
Note: If the depth exceeds MaxDepth+1, that means we have a rope that NewBalance was unable to iron out sufficiently using PartIsBalanced as the stopper. In this very unusual case, we will map all the way down to the flat pieces to guarantee a balanced result.
MapParts[part, SavePart, stopper];
WHILE aN > 1
DO
a[aN-2] ¬ SafeConcat[a[aN-2], a[aN-1]];
a[aN-1] ¬ NIL;
aN ¬ aN - 1;
ENDLOOP;
RETURN [a[0]]
};
BadPart: SIGNAL ~ CODE;
MapParts:
PROC [part: Part, action:
PROC[Part], stopDescent: Stopper ¬
NIL] ~{
IF stopDescent#
NIL
AND stopDescent[part]
THEN action[part]
ELSE {
size: INT ~ InlineSize[part.base];
depth: INT ~ InlineDepth[part.base]; -- debugging information
IF part.start < 0 OR part.end NOT IN [part.start..size] THEN ERROR BadPart;
WITH part.base
SELECT
FROM
substr:
REF Rope.RopeRep.node.substr => {
MapParts[[substr.base, substr.start+part.start, substr.start+part.end], action, stopDescent];
};
concat:
REF Rope.RopeRep.node.concat => {
IF part.start < concat.pos
THEN {
MapParts[[concat.base, part.start, MIN[part.end, concat.pos]], action, stopDescent];
};
IF part.end > concat.pos
THEN {
newStart: INT ¬ MAX[part.start-concat.pos, 0];
newEnd: INT ¬ part.end-concat.pos;
MapParts[[concat.rest, newStart, newEnd], action, stopDescent];
};
};
replace:
REF Rope.RopeRep.node.replace => {
len1: INT ~ replace.start;
len2: INT ~ replace.newPos-replace.start;
len3: INT ~ replace.size-replace.newPos;
offset3: INT ~ replace.oldPos;
IF part.start < len1
THEN {
MapParts[[replace.base, part.start, MIN[part.end, len1]], action, stopDescent];
};
IF part.start < len1+len2
AND part.end > len1
THEN {
newStart: INT ~ MAX[part.start-len1, 0];
newEnd: INT ~ MIN[part.end-len1, len2];
MapParts[[replace.replace, newStart, newEnd], action, stopDescent];
};
IF part.end > len1+len2
THEN {
newStart: INT ¬ MAX[part.start-(len1+len2), 0]+offset3;
newEnd: INT ¬ MIN[part.end-(len1+len2), len3]+offset3;
MapParts[[replace.base, newStart, newEnd], action, stopDescent];
};
};
ENDCASE => action[part];
};
};