RopeImplExt.mesa, "Thick" string implementation extension
Russ Atkinson, June 7, 1982 4:04 pm
Paul Rovner, August 8, 1983 11:39 am
This implementation supports "lazy evaluation" for Substr, Concat,
and Replace operations. It also allows the user to create arbitrary
implementations for compatible Rope objects by supplying procedures
for the defining operations.
DIRECTORY
Ascii USING [Lower],
Rope USING [InlineFetch, InlineSize, ContainingPiece, Map, NoRope, ROPE, Size, Text],
RopePrivate USING [InlineDepth, SingleSize, NonNeg, DoubleSize];
RopeImplExt: CEDAR PROGRAM
IMPORTS Ascii, Rope, RopePrivate
EXPORTS Rope
SHARES Rope
= BEGIN OPEN Rope, RopePrivate;
The remaining operations provide for Rope scanning and matching
Run: PUBLIC PROC [s1: ROPE, pos1: INT, s2: ROPE, pos2: INT,
case: BOOLTRUE] RETURNS [result: INT] = TRUSTED {
Returns the largest number of characters N such that
s1 starting at pos1 is equal to s2 starting at pos2
for N characters. More formally:
FOR i IN [0..N): s1[pos1+i] = s2[pos2+i]
If case is true, then case matters, otherwise
upper and lower case characters are considered equal.
len1,len2: INT;
str1, str2: Text;
[len1, str1] ← SingleSize[s1];
[len2, str2] ← SingleSize[s2];
result ← 0;
IF NonNeg[pos1] >= len1 OR NonNeg[pos2] >= len2 THEN RETURN;
{r1,r2: ROPENIL;
st1,sz1,lm1: INT ← 0;
st2,sz2,lm2: INT ← 0;
c1,c2: CHAR;
DO
IF st1 = lm1 THEN {
need a new piece from s1
[r1, st1, sz1] ← ContainingPiece[s1, pos1 ← pos1 + sz1];
IF sz1 = 0 THEN RETURN;
lm1 ← st1 + sz1};
IF st2 = lm2 THEN {
need a new piece from s2
[r2, st2, sz2] ← ContainingPiece[s2, pos2 ← pos2 + sz2];
IF sz2 = 0 THEN RETURN;
lm2 ← st2 + sz2};
c1 ← InlineFetch[r1, st1];
c2 ← InlineFetch[r2, st2];
IF case
THEN {IF c1 # c2 THEN RETURN}
ELSE {IF Ascii.Lower[c1] # Ascii.Lower[c2] THEN RETURN};
result ← result + 1;
st1 ← st1 + 1; st2 ← st2 + 1;
ENDLOOP;
}};
Find: PUBLIC PROC
[s1, s2: ROPE, pos1: INT ← 0, case: BOOLTRUE]
RETURNS [INT] = TRUSTED {
index: INT ← Index[s1, pos1, s2, case];
IF index = InlineSize[s1] THEN RETURN [-1];
RETURN [index];
};
Index: PUBLIC PROC
[s1: ROPE, pos1: INT, s2: ROPE, case: BOOLTRUE]
RETURNS [INT] = TRUSTED {
Returns the smallest character position N such that
s2 occurs in s1 at N and N >= pos1. If s2 does not
occur in s1 at or after pos1, s1.length is returned.
pos1 <= N < s1.length =>
FOR i IN [0..s2.length): s1[N+i] = s2[i]
N = s1.length => s2 does not occur in s1
If case is true, then case matters, otherwise
upper and lower case characters are considered equal.
len1,len2, rem: INT; both: BOOL;
[len1,len2,both] ← DoubleSize[s1, s2];
rem ← IF pos1 >= len1 THEN 0 ELSE len1 - NonNeg[pos1];
IF rem < len2 THEN RETURN [len1];
IF len2 = 0 THEN RETURN [pos1];
{c: CHAR ← InlineFetch[s2, 0];
Cmp: PROC [cc: CHAR] RETURNS [BOOL] = TRUSTED {
IF c = cc AND Run[s1, pos1+1, s2, 1, case]+1 = len2
THEN RETURN [TRUE];
pos1 ← pos1 + 1; RETURN [FALSE];
};
LCmp: PROC [cc: CHAR] RETURNS [BOOL] = TRUSTED {
IF c = Ascii.Lower[cc] AND Run[s1, pos1+1, s2, 1, case]+1 = len2
THEN RETURN [TRUE];
pos1 ← pos1 + 1; RETURN [FALSE];
};
rem ← rem - len2 + 1;
IF case
THEN {IF Map[s1, pos1, rem, Cmp] THEN RETURN [pos1]}
ELSE {c ← Ascii.Lower[c]; IF Map[s1, pos1, rem, LCmp] THEN RETURN [pos1]};
};
RETURN [len1];
};
Match: PUBLIC PROC [pattern, object: ROPE, case: BOOLTRUE]
RETURNS [BOOL] = TRUSTED {
Returns TRUE if the object matches the pattern,
where the pattern may contain * to indicate that
0 or more characters will match. Returns FALSE
otherwise. For example, using a*b as a pattern:
some matching objects are: ab, a#b, a###b
some not matching objects: abc, cde, bb, a, Ab
If case is true, then case matters, otherwise
upper and lower case characters are considered equal.
submatch: PROC [i1: INT, len1: INT, i2: INT, len2: INT]
RETURNS [BOOL] = TRUSTED {
WHILE len1 > 0 DO
c1: CHAR ← InlineFetch[pattern, i1];
IF c1 = '* THEN
{-- quick kill for * at end of pattern
IF len1 = 1 THEN RETURN [TRUE];
else must take all combinations
{-- first, accept the *
j1: INT ← i1 + 1;
nlen1: INT ← len1 - 1;
j2: INT ← i2;
nlen2: INT ← len2;
WHILE nlen2 >= 0 DO
IF submatch[j1, nlen1, j2, nlen2] THEN RETURN [TRUE];
j2 ← j2 + 1;
nlen2 ← nlen2 - 1;
ENDLOOP;
};
RETURN [FALSE];
};
IF len2 <= 0 THEN RETURN [FALSE];
at this point demand an exact match in both strings
{c2: CHAR ← InlineFetch[object, i2];
IF c1 # c2 THEN
IF case OR (Ascii.Lower[c1] # Ascii.Lower[c2]) THEN RETURN [FALSE];
};
i1 ← i1 + 1;
len1 ← len1 - 1;
i2 ← i2 + 1;
len2 ← len2 - 1;
ENDLOOP;
RETURN [len2 = 0];
};
len1: INT ← InlineSize[pattern];
len2: INT ← InlineSize[object];
RETURN [submatch [0, len1, 0, len2]];
};
SkipTo: PUBLIC PROC [s: ROPE, pos: INT, skip: ROPE] RETURNS [INT] = TRUSTED {
return the lowest position N in s such that s[N] is in
the skip string and N >= pos. If no such character occurs
in s, then return s.length.
len: INT ← InlineSize[s];
skiplen: INT ← InlineSize[skip];
CharMatch: PROC [c: CHAR] RETURNS [BOOL] = TRUSTED {
SubMatch: PROC [cc: CHAR] RETURNS [BOOL] = TRUSTED {RETURN [c = cc]};
IF Map[skip, 0, skiplen, SubMatch] THEN RETURN [TRUE];
pos ← pos + 1; RETURN [FALSE]};
IF pos < len AND Map[s, pos, len - pos, CharMatch] THEN RETURN [pos];
RETURN [len];
};
SkipOver: PUBLIC PROC [s: ROPE, pos: INT, skip: ROPE] RETURNS [INT] = TRUSTED {
return the lowest position N in s such that s[N] is NOT in
the skip string and N >= pos. If no such character occurs
in s, then return s.length.
len: INT ← InlineSize[s];
skiplen: INT ← InlineSize[skip];
CharMatch: PROC [c: CHAR] RETURNS [BOOL] = TRUSTED {
SubMatch: PROC [cc: CHAR] RETURNS [BOOL] = TRUSTED {RETURN [c = cc]};
IF NOT Map[skip, 0, skiplen, SubMatch]
THEN RETURN [TRUE];
pos ← pos + 1; RETURN [FALSE]};
IF pos < len AND Map[s, pos, len - pos, CharMatch] THEN RETURN [pos];
RETURN [len];
};
VerifyStructure: PUBLIC PROC
[s: ROPE] RETURNS [leaves,nodes,maxDepth: INT] = TRUSTED {
traverse the structure of the given rope object
extra checking is performed to verify invariants
leaves ← 0;
maxDepth ← 0;
nodes ← 0;
IF s = NIL THEN RETURN;
WITH x: s SELECT FROM
text =>
{leaves ← 1;
IF x.length > x.max THEN ERROR VerifyFailed};
node =>
WITH x: x SELECT FROM
substr => {
ref: ROPE ← x.base;
len1: INT ← Size[x.base];
IF x.start < 0 OR x.size <= 0 THEN ERROR VerifyFailed;
IF len1 < x.start + x.size THEN ERROR VerifyFailed;
[leaves, nodes, maxDepth] ← VerifyStructure[ref];
nodes ← nodes + 1};
concat => {
leaves1,nodes1,maxDepth1: INT;
left: ROPE ← x.base;
lSize: INT ← Size[left];
right: ROPE ← x.rest;
rSize: INT ← Size[right];
[leaves1, nodes1, maxDepth1] ← VerifyStructure[left];
[leaves, nodes, maxDepth] ← VerifyStructure[right];
leaves ← leaves + leaves1;
nodes ← nodes + nodes1 + 1;
IF maxDepth1 > maxDepth THEN maxDepth ← maxDepth1;
IF x.size # lSize + rSize THEN ERROR VerifyFailed;
IF x.pos # lSize THEN ERROR VerifyFailed;
};
replace => {
leaves1,nodes1,maxDepth1: INT;
old: ROPE ← x.base;
oldSize: INT ← Size[old];
repl: ROPE ← x.replace;
replSize: INT ← Size[repl];
[leaves, nodes, maxDepth] ← VerifyStructure[old];
[leaves1, nodes1, maxDepth1] ← VerifyStructure[repl];
leaves ← leaves + leaves1;
nodes ← nodes + nodes1 + 1;
IF maxDepth < maxDepth1 THEN maxDepth ← maxDepth1;
IF x.start > x.oldPos OR x.start > x.newPos THEN ERROR VerifyFailed;
IF x.newPos - x.start # replSize THEN ERROR VerifyFailed;
IF x.start < 0 OR x.start > x.size THEN ERROR VerifyFailed;
IF x.oldPos > oldSize THEN ERROR VerifyFailed;
};
object => {
leaves ← 1; IF x.size < 0 OR x.fetch = NIL THEN ERROR VerifyFailed;
};
ENDCASE => ERROR NoRope;
ENDCASE => ERROR NoRope;
maxDepth ← maxDepth + 1;
IF maxDepth # InlineDepth[s] THEN ERROR VerifyFailed;
};
VerifyFailed: PUBLIC ERROR = CODE;
END.