-- RopeImplExt.mesa, "Thick" string implementation extension
-- Russ Atkinson, June 7, 1982 4:04 pm

-- This implementation supports "lazy evaluation" for Substr, Concat,
-- and Replace operations.  It also allows the user to create arbitrary
-- implementations for compatible Rope objects by supplying procedures
-- for the defining operations.

DIRECTORY
  Rope USING [ContainingPiece, Map, NoRope, ROPE, Size, Text],
  RopeInline;

RopeImplExt: CEDAR PROGRAM
     IMPORTS Rope, RopeInline
     EXPORTS Rope
     SHARES  Rope
     = BEGIN OPEN Rope, RopeInline;

  -- The remaining operations provide for Rope scanning and matching
  Run: PUBLIC PROC [s1: ROPE, pos1: INT, s2: ROPE, pos2: INT,
            case: BOOL ← TRUE] RETURNS [result: INT] = TRUSTED {
     -- Returns the largest number of characters N such that
     -- s1 starting at pos1 is equal to s2 starting at pos2
     -- for N characters.  More formally:
     -- FOR i IN [0..N): s1[pos1+i] = s2[pos2+i]
     -- If case is true, then case matters, otherwise
     -- upper and lower case characters are considered equal. 
    len1,len2: INT; 
    str1, str2: Text;
    [len1, str1] ← SingleSize[s1];
    [len2, str2] ← SingleSize[s2];
    result ← 0;
    IF NonNeg[pos1] >= len1 OR NonNeg[pos2] >= len2 THEN RETURN;
    {r1,r2: ROPE ← NIL;
     st1,sz1,lm1: INT ← 0;
     st2,sz2,lm2: INT ← 0;
     c1,c2: CHAR;
     DO
        IF st1 = lm1 THEN {
	   -- need a new piece from s1
	   [r1, st1, sz1] ← ContainingPiece[s1, pos1 ← pos1 + sz1];
	   IF sz1 = 0 THEN RETURN;
	   lm1 ← st1 + sz1};
	IF st2 = lm2 THEN {
	   -- need a new piece from s2
	   [r2, st2, sz2] ← ContainingPiece[s2, pos2 ← pos2 + sz2];
	   IF sz2 = 0 THEN RETURN;
	   lm2 ← st2 + sz2};
	c1 ← InlineFetch[r1, st1];
	c2 ← InlineFetch[r2, st2];
	IF case
	   THEN {IF c1 # c2 THEN RETURN}
	   ELSE {IF Lower[c1] # Lower[c2] THEN RETURN};
	result ← result + 1;
	st1 ← st1 + 1; st2 ← st2 + 1;
        ENDLOOP;
     }};

  Find: PUBLIC PROC
      [s1, s2: ROPE, pos1: INT ← 0, case: BOOL ← TRUE]
      RETURNS [INT] = TRUSTED {
    index: INT ← Index[s1, pos1, s2, case];
    IF index = RopeInline.InlineSize[s1] THEN RETURN [-1];
    RETURN [index];
    };

  Index: PUBLIC PROC
     [s1: ROPE, pos1: INT, s2: ROPE, case: BOOL ← TRUE]
     RETURNS [INT] = TRUSTED {
     -- Returns the smallest character position N such that
     -- s2 occurs in s1 at N and N >= pos1.  If s2 does not
     -- occur in s1 at or after pos1, s1.length is returned.
     -- pos1 <= N < s1.length =>
        --  FOR i IN [0..s2.length): s1[N+i] = s2[i]
     -- N = s1.length => s2 does not occur in s1
     -- If case is true, then case matters, otherwise
     -- upper and lower case characters are considered equal.
    len1,len2, rem: INT; both: BOOL;
    [len1,len2,both] ← DoubleSize[s1, s2];
    rem ← IF pos1 >= len1 THEN 0 ELSE len1 - NonNeg[pos1];
    IF rem < len2 THEN RETURN [len1];
    IF len2 = 0 THEN RETURN [pos1];
    {c: CHAR ← InlineFetch[s2, 0];
     Cmp: PROC [cc: CHAR] RETURNS [BOOL] = TRUSTED {
       IF c = cc AND Run[s1, pos1+1, s2, 1, case]+1 = len2
          THEN RETURN [TRUE];
       pos1 ← pos1 + 1; RETURN [FALSE];
       };
     LCmp: PROC [cc: CHAR] RETURNS [BOOL] = TRUSTED {
       IF c = Lower[cc] AND Run[s1, pos1+1, s2, 1, case]+1 = len2
          THEN RETURN [TRUE];
       pos1 ← pos1 + 1; RETURN [FALSE];
       };
     rem ← rem - len2 + 1;
     IF case
        THEN {IF Map[s1, pos1, rem, Cmp] THEN RETURN [pos1]}
        ELSE {c ← Lower[c]; IF Map[s1, pos1, rem, LCmp] THEN RETURN [pos1]};
     };
    RETURN [len1];
    }; 

  Match: PUBLIC PROC [pattern, object: ROPE, case: BOOL ← TRUE]
     RETURNS [BOOL] = TRUSTED {
     -- Returns TRUE if the object matches the pattern,
     -- where the pattern may contain * to indicate that
     -- 0 or more characters will match.  Returns FALSE
     -- otherwise.  For example, using a*b as a pattern:
     -- some matching objects are: ab, a#b, a###b
     -- some not matching objects: abc, cde, bb, a, Ab
     -- If case is true, then case matters, otherwise
     -- upper and lower case characters are considered equal.
    submatch: PROC [i1: INT, len1: INT, i2: INT, len2: INT]
                   RETURNS [BOOL] = TRUSTED {
      WHILE len1 > 0 DO
        c1: CHAR ← RopeInline.InlineFetch[pattern, i1];
        IF c1 = '* THEN
           {-- quick kill for * at end of pattern
            IF len1 = 1 THEN RETURN [TRUE];
            -- else must take all combinations
            {-- first, accept the *
       	     j1: INT ← i1 + 1;
             nlen1: INT ← len1 - 1;
             j2: INT ← i2;
             nlen2: INT ← len2;
             WHILE nlen2 >= 0 DO
               IF submatch[j1, nlen1, j2, nlen2] THEN RETURN [TRUE];
               j2 ← j2 + 1;
               nlen2 ← nlen2 - 1;
               ENDLOOP;
	     };
           RETURN [FALSE];
           };
        IF len2 <= 0 THEN RETURN [FALSE];
        -- at this point demand an exact match in both strings
        {c2: CHAR ← RopeInline.InlineFetch[object, i2];
         IF c1 # c2 THEN
            IF case OR (Lower[c1] # Lower[c2]) THEN RETURN [FALSE];
         };
        i1 ← i1 + 1;
        len1 ← len1 - 1;
        i2 ← i2 + 1;
        len2 ← len2 - 1;
        ENDLOOP;
      RETURN [len2 = 0];
      };
    len1: INT ← RopeInline.InlineSize[pattern];
    len2: INT ← RopeInline.InlineSize[object];
    RETURN [submatch [0, len1, 0, len2]];
    }; 

  SkipTo: PUBLIC PROC [s: ROPE, pos: INT, skip: ROPE] RETURNS [INT] = TRUSTED {
    -- return the lowest position N in s such that s[N] is in
    -- the skip string and N >= pos.  If no such character occurs
    -- in s, then return s.length.
    len: INT ← InlineSize[s];
    skiplen: INT ← InlineSize[skip];
    CharMatch: PROC [c: CHAR] RETURNS [BOOL] = TRUSTED {
       SubMatch: PROC [cc: CHAR] RETURNS [BOOL] = TRUSTED {RETURN [c = cc]};
       IF Map[skip, 0, skiplen, SubMatch] THEN RETURN [TRUE];
       pos ← pos + 1; RETURN [FALSE]};
    IF pos < len AND Map[s, pos, len - pos, CharMatch] THEN RETURN [pos];
    RETURN [len];  
    };

  SkipOver: PUBLIC PROC [s: ROPE, pos: INT, skip: ROPE] RETURNS [INT] = TRUSTED {
    -- return the lowest position N in s such that s[N] is NOT in
    -- the skip string and N >= pos.  If no such character occurs
    -- in s, then return s.length.
    len: INT ← InlineSize[s];
    skiplen: INT ← InlineSize[skip];
    CharMatch: PROC [c: CHAR] RETURNS [BOOL] = TRUSTED {
       SubMatch: PROC [cc: CHAR] RETURNS [BOOL] = TRUSTED {RETURN [c = cc]};
       IF NOT Map[skip, 0, skiplen, SubMatch]
          THEN RETURN [TRUE];
       pos ← pos + 1; RETURN [FALSE]};
    IF pos < len AND Map[s, pos, len - pos, CharMatch] THEN RETURN [pos];
    RETURN [len];  
    };
    
  VerifyStructure: PUBLIC PROC
     [s: ROPE] RETURNS [leaves,nodes,maxDepth: INT] = TRUSTED {
     -- traverse the structure of the given rope object
     -- extra checking is performed to verify invariants
    leaves ← 0;
    maxDepth ← 0;
    nodes ← 0;
    IF s = NIL THEN RETURN;
    WITH x: s SELECT FROM
       text =>
        {leaves ← 1;
         IF x.length > x.max THEN ERROR VerifyFailed};
       node =>
	 WITH x: x SELECT FROM
           substr => {
	     ref: ROPE ← x.base;
	     len1: INT ← Size[x.base];
	     IF x.start < 0 OR x.size <= 0 THEN ERROR VerifyFailed;
	     IF len1 < x.start + x.size THEN ERROR VerifyFailed;
	     [leaves, nodes, maxDepth] ← VerifyStructure[ref];
	     nodes ← nodes + 1};
           concat => {
	     leaves1,nodes1,maxDepth1: INT;
	     left: ROPE ← x.base;
	     lSize: INT ← Size[left];
	     right: ROPE ← x.rest;
	     rSize: INT ← Size[right];
	     [leaves1, nodes1, maxDepth1] ← VerifyStructure[left];
	     [leaves, nodes, maxDepth] ← VerifyStructure[right];
	     leaves ← leaves + leaves1;
	     nodes ← nodes + nodes1 + 1;
	     IF maxDepth1 > maxDepth THEN maxDepth ← maxDepth1;
	     IF x.size # lSize + rSize THEN ERROR VerifyFailed;
	     IF x.pos # lSize THEN ERROR VerifyFailed;
	     };
           replace => {
	     leaves1,nodes1,maxDepth1: INT;
	     old: ROPE ← x.base;
	     oldSize: INT ← Size[old];
	     repl: ROPE ← x.replace;
	     replSize: INT ← Size[repl];
	     [leaves, nodes, maxDepth] ← VerifyStructure[old];
	     [leaves1, nodes1, maxDepth1] ← VerifyStructure[repl];
	     leaves ← leaves + leaves1;
	     nodes ← nodes + nodes1 + 1;
	     IF maxDepth < maxDepth1 THEN maxDepth ← maxDepth1;
	     IF x.start > x.oldPos OR x.start > x.newPos THEN ERROR VerifyFailed;
	     IF x.newPos - x.start # replSize THEN ERROR VerifyFailed;
	     IF x.start < 0 OR x.start > x.size THEN ERROR VerifyFailed;
	     IF x.oldPos > oldSize THEN ERROR VerifyFailed;
	     };
           object => {
	     leaves ← 1; IF x.size < 0 OR x.fetch = NIL THEN ERROR VerifyFailed;
	     };
           ENDCASE => ERROR NoRope;
       ENDCASE => ERROR NoRope;
    maxDepth ← maxDepth + 1;
    IF maxDepth # InlineDepth[s] THEN ERROR VerifyFailed;
    };

  VerifyFailed: PUBLIC ERROR = CODE;

  END.