-- RopeImplExt.mesa, "Thick" string implementation extension -- Russ Atkinson, June 7, 1982 4:04 pm -- This implementation supports "lazy evaluation" for Substr, Concat, -- and Replace operations. It also allows the user to create arbitrary -- implementations for compatible Rope objects by supplying procedures -- for the defining operations. DIRECTORY Rope USING [ContainingPiece, Map, NoRope, ROPE, Size, Text], RopeInline; RopeImplExt: CEDAR PROGRAM IMPORTS Rope, RopeInline EXPORTS Rope SHARES Rope = BEGIN OPEN Rope, RopeInline; -- The remaining operations provide for Rope scanning and matching Run: PUBLIC PROC [s1: ROPE, pos1: INT, s2: ROPE, pos2: INT, case: BOOL ← TRUE] RETURNS [result: INT] = TRUSTED { -- Returns the largest number of characters N such that -- s1 starting at pos1 is equal to s2 starting at pos2 -- for N characters. More formally: -- FOR i IN [0..N): s1[pos1+i] = s2[pos2+i] -- If case is true, then case matters, otherwise -- upper and lower case characters are considered equal. len1,len2: INT; str1, str2: Text; [len1, str1] ← SingleSize[s1]; [len2, str2] ← SingleSize[s2]; result ← 0; IF NonNeg[pos1] >= len1 OR NonNeg[pos2] >= len2 THEN RETURN; {r1,r2: ROPE ← NIL; st1,sz1,lm1: INT ← 0; st2,sz2,lm2: INT ← 0; c1,c2: CHAR; DO IF st1 = lm1 THEN { -- need a new piece from s1 [r1, st1, sz1] ← ContainingPiece[s1, pos1 ← pos1 + sz1]; IF sz1 = 0 THEN RETURN; lm1 ← st1 + sz1}; IF st2 = lm2 THEN { -- need a new piece from s2 [r2, st2, sz2] ← ContainingPiece[s2, pos2 ← pos2 + sz2]; IF sz2 = 0 THEN RETURN; lm2 ← st2 + sz2}; c1 ← InlineFetch[r1, st1]; c2 ← InlineFetch[r2, st2]; IF case THEN {IF c1 # c2 THEN RETURN} ELSE {IF Lower[c1] # Lower[c2] THEN RETURN}; result ← result + 1; st1 ← st1 + 1; st2 ← st2 + 1; ENDLOOP; }}; Find: PUBLIC PROC [s1, s2: ROPE, pos1: INT ← 0, case: BOOL ← TRUE] RETURNS [INT] = TRUSTED { index: INT ← Index[s1, pos1, s2, case]; IF index = RopeInline.InlineSize[s1] THEN RETURN [-1]; RETURN [index]; }; Index: PUBLIC PROC [s1: ROPE, pos1: INT, s2: ROPE, case: BOOL ← TRUE] RETURNS [INT] = TRUSTED { -- Returns the smallest character position N such that -- s2 occurs in s1 at N and N >= pos1. If s2 does not -- occur in s1 at or after pos1, s1.length is returned. -- pos1 <= N < s1.length => -- FOR i IN [0..s2.length): s1[N+i] = s2[i] -- N = s1.length => s2 does not occur in s1 -- If case is true, then case matters, otherwise -- upper and lower case characters are considered equal. len1,len2, rem: INT; both: BOOL; [len1,len2,both] ← DoubleSize[s1, s2]; rem ← IF pos1 >= len1 THEN 0 ELSE len1 - NonNeg[pos1]; IF rem < len2 THEN RETURN [len1]; IF len2 = 0 THEN RETURN [pos1]; {c: CHAR ← InlineFetch[s2, 0]; Cmp: PROC [cc: CHAR] RETURNS [BOOL] = TRUSTED { IF c = cc AND Run[s1, pos1+1, s2, 1, case]+1 = len2 THEN RETURN [TRUE]; pos1 ← pos1 + 1; RETURN [FALSE]; }; LCmp: PROC [cc: CHAR] RETURNS [BOOL] = TRUSTED { IF c = Lower[cc] AND Run[s1, pos1+1, s2, 1, case]+1 = len2 THEN RETURN [TRUE]; pos1 ← pos1 + 1; RETURN [FALSE]; }; rem ← rem - len2 + 1; IF case THEN {IF Map[s1, pos1, rem, Cmp] THEN RETURN [pos1]} ELSE {c ← Lower[c]; IF Map[s1, pos1, rem, LCmp] THEN RETURN [pos1]}; }; RETURN [len1]; }; Match: PUBLIC PROC [pattern, object: ROPE, case: BOOL ← TRUE] RETURNS [BOOL] = TRUSTED { -- Returns TRUE if the object matches the pattern, -- where the pattern may contain * to indicate that -- 0 or more characters will match. Returns FALSE -- otherwise. For example, using a*b as a pattern: -- some matching objects are: ab, a#b, a###b -- some not matching objects: abc, cde, bb, a, Ab -- If case is true, then case matters, otherwise -- upper and lower case characters are considered equal. submatch: PROC [i1: INT, len1: INT, i2: INT, len2: INT] RETURNS [BOOL] = TRUSTED { WHILE len1 > 0 DO c1: CHAR ← RopeInline.InlineFetch[pattern, i1]; IF c1 = '* THEN {-- quick kill for * at end of pattern IF len1 = 1 THEN RETURN [TRUE]; -- else must take all combinations {-- first, accept the * j1: INT ← i1 + 1; nlen1: INT ← len1 - 1; j2: INT ← i2; nlen2: INT ← len2; WHILE nlen2 >= 0 DO IF submatch[j1, nlen1, j2, nlen2] THEN RETURN [TRUE]; j2 ← j2 + 1; nlen2 ← nlen2 - 1; ENDLOOP; }; RETURN [FALSE]; }; IF len2 <= 0 THEN RETURN [FALSE]; -- at this point demand an exact match in both strings {c2: CHAR ← RopeInline.InlineFetch[object, i2]; IF c1 # c2 THEN IF case OR (Lower[c1] # Lower[c2]) THEN RETURN [FALSE]; }; i1 ← i1 + 1; len1 ← len1 - 1; i2 ← i2 + 1; len2 ← len2 - 1; ENDLOOP; RETURN [len2 = 0]; }; len1: INT ← RopeInline.InlineSize[pattern]; len2: INT ← RopeInline.InlineSize[object]; RETURN [submatch [0, len1, 0, len2]]; }; SkipTo: PUBLIC PROC [s: ROPE, pos: INT, skip: ROPE] RETURNS [INT] = TRUSTED { -- return the lowest position N in s such that s[N] is in -- the skip string and N >= pos. If no such character occurs -- in s, then return s.length. len: INT ← InlineSize[s]; skiplen: INT ← InlineSize[skip]; CharMatch: PROC [c: CHAR] RETURNS [BOOL] = TRUSTED { SubMatch: PROC [cc: CHAR] RETURNS [BOOL] = TRUSTED {RETURN [c = cc]}; IF Map[skip, 0, skiplen, SubMatch] THEN RETURN [TRUE]; pos ← pos + 1; RETURN [FALSE]}; IF pos < len AND Map[s, pos, len - pos, CharMatch] THEN RETURN [pos]; RETURN [len]; }; SkipOver: PUBLIC PROC [s: ROPE, pos: INT, skip: ROPE] RETURNS [INT] = TRUSTED { -- return the lowest position N in s such that s[N] is NOT in -- the skip string and N >= pos. If no such character occurs -- in s, then return s.length. len: INT ← InlineSize[s]; skiplen: INT ← InlineSize[skip]; CharMatch: PROC [c: CHAR] RETURNS [BOOL] = TRUSTED { SubMatch: PROC [cc: CHAR] RETURNS [BOOL] = TRUSTED {RETURN [c = cc]}; IF NOT Map[skip, 0, skiplen, SubMatch] THEN RETURN [TRUE]; pos ← pos + 1; RETURN [FALSE]}; IF pos < len AND Map[s, pos, len - pos, CharMatch] THEN RETURN [pos]; RETURN [len]; }; VerifyStructure: PUBLIC PROC [s: ROPE] RETURNS [leaves,nodes,maxDepth: INT] = TRUSTED { -- traverse the structure of the given rope object -- extra checking is performed to verify invariants leaves ← 0; maxDepth ← 0; nodes ← 0; IF s = NIL THEN RETURN; WITH x: s SELECT FROM text => {leaves ← 1; IF x.length > x.max THEN ERROR VerifyFailed}; node => WITH x: x SELECT FROM substr => { ref: ROPE ← x.base; len1: INT ← Size[x.base]; IF x.start < 0 OR x.size <= 0 THEN ERROR VerifyFailed; IF len1 < x.start + x.size THEN ERROR VerifyFailed; [leaves, nodes, maxDepth] ← VerifyStructure[ref]; nodes ← nodes + 1}; concat => { leaves1,nodes1,maxDepth1: INT; left: ROPE ← x.base; lSize: INT ← Size[left]; right: ROPE ← x.rest; rSize: INT ← Size[right]; [leaves1, nodes1, maxDepth1] ← VerifyStructure[left]; [leaves, nodes, maxDepth] ← VerifyStructure[right]; leaves ← leaves + leaves1; nodes ← nodes + nodes1 + 1; IF maxDepth1 > maxDepth THEN maxDepth ← maxDepth1; IF x.size # lSize + rSize THEN ERROR VerifyFailed; IF x.pos # lSize THEN ERROR VerifyFailed; }; replace => { leaves1,nodes1,maxDepth1: INT; old: ROPE ← x.base; oldSize: INT ← Size[old]; repl: ROPE ← x.replace; replSize: INT ← Size[repl]; [leaves, nodes, maxDepth] ← VerifyStructure[old]; [leaves1, nodes1, maxDepth1] ← VerifyStructure[repl]; leaves ← leaves + leaves1; nodes ← nodes + nodes1 + 1; IF maxDepth < maxDepth1 THEN maxDepth ← maxDepth1; IF x.start > x.oldPos OR x.start > x.newPos THEN ERROR VerifyFailed; IF x.newPos - x.start # replSize THEN ERROR VerifyFailed; IF x.start < 0 OR x.start > x.size THEN ERROR VerifyFailed; IF x.oldPos > oldSize THEN ERROR VerifyFailed; }; object => { leaves ← 1; IF x.size < 0 OR x.fetch = NIL THEN ERROR VerifyFailed; }; ENDCASE => ERROR NoRope; ENDCASE => ERROR NoRope; maxDepth ← maxDepth + 1; IF maxDepth # InlineDepth[s] THEN ERROR VerifyFailed; }; VerifyFailed: PUBLIC ERROR = CODE; END.