-- Rope.mesa, "Thick" string definitions
-- Russ Atkinson, August 27, 1982 11:51 am

-- Table of Contents:
-- *** Part 1: Basic operations and definitions
-- *** Part 2: Extended operations and definitions
-- *** Part 3: Miscellaneous definitions

-- A Rope is (nominally) an immutable object containg a sequence of characters indexed starting at 0 for Size characters. The representation allows certain operations to be performed without copying all of the characters at the expense of adding additional nodes to the object.

-- NOTE: the bit pattern of the text variant is GUARANTEED to be consistent with the built-in Cedar type TEXT, although the types will not agree either at compile-time or runtime. This means that one can use objects of type Text or TEXT interchangeably provided that the runtime type is not examined, and provided that the compiler can be fooled (via LOOPHOLE). Since REF TEXT is roughly compatible with Mesa LONG STRING, this is a means for passing Rope refs to Pilot routines that expect LONG STRING. For short STRING you are on your own (see ConvertUnsafe).

DIRECTORY
Environment USING [Comparison];

Rope: CEDAR DEFINITIONS

-- *** Part 1: Basic operations and definitions *** --

= BEGIN

ROPE: TYPE = REF RopeRep;

NoRope: ERROR;
-- signalled if rope is invalid variant
-- usually indicates severe illness in the world

-- Note: BoundsFault = Runtime.BoundsFault
-- It is raised by many of the Rope operations

Cat: PROC [r1, r2, r3, r4, r5, r6: ROPENIL] RETURNS [ROPE];
-- returns the concatenation of up to six ropes (limit based on eval stack depth)
-- BoundsFault occurs if the result gets too large

Concat: PROC [base,rest: ROPENIL] RETURNS [ROPE];
-- the two-rope (faster) version of Cat
-- BoundsFault occurs if the result gets too large

Compare: PROC
[s1, s2: ROPE, case: BOOLTRUE] RETURNS [Environment.Comparison];
-- based on CHAR collating sequence
-- case => case of characters is significant

Equal: PROC [s1, s2: ROPE, case: BOOLTRUE] RETURNS [BOOL];
-- contents equality of s1 and s2
-- case => case of characters is significant

Fetch: PROC [base: ROPE, index: INT ← 0] RETURNS [c: CHAR];
-- fetches indexed character from given ropes
-- BoundsFault occurs if index is >= the rope size

Find: PROC [s1, s2: ROPE, pos1: INT ← 0, case: BOOLTRUE] RETURNS [INT];
-- like Index, returns position in s1 where s2 occurs (starts looking at pos1)
-- returns -1 if not found
-- case => case of characters is significant

Index: PROC
[s1: ROPE, pos1: INT ← 0, s2: ROPE, case: BOOLTRUE] RETURNS [INT];
-- Returns the smallest character position N such that
-- s2 occurs in s1 at N and N >= pos1. If s2 does not
-- occur in s1 at or after pos1, s1.length is returned.
-- case => case of characters is significant

IsEmpty: PROC [r: ROPE] RETURNS [BOOL];
-- returns Length[r] = 0

Length: PROC [base: ROPE] RETURNS [INT];
-- returns the length of the rope (Length[NIL] = 0)

Replace: PROC
[base: ROPE, start: INT ← 0, len: INT ← MaxLen, with: ROPENIL]
RETURNS [ROPE];
-- returns rope with given range replaced by new
-- BoundsFault occurs if range invalid or result too long

Size: PROC [base: ROPE] RETURNS [INT];
-- Size[base] = Length[base]

Substr: PROC [base: ROPE, start: INT ← 0, len: INT ← MaxLen] RETURNS [ROPE];
-- returns a subrope of the base
-- BoundsFault occurs if the range given is not valid

-- character conversions (RRA sez: why are they here?)

Control: PROC [ch: CHAR] RETURNS [CHAR] = INLINE {
RETURN [IF ch IN ['A..'Z] THEN ch - controlOffset ELSE ch]
};

Upper: PROC [ch: CHAR] RETURNS [CHAR] = INLINE {
RETURN [IF ch IN ['a..'z] THEN ch - caseOffset ELSE ch]
};

Lower: PROC [ch: CHAR] RETURNS [CHAR] = INLINE {
RETURN [IF ch IN ['A..'Z] THEN ch + caseOffset ELSE ch]
};

Letter: PROC [ch: CHAR] RETURNS [BOOL] = INLINE {
RETURN [ch IN ['A..'Z] OR ch IN ['a..'z]]
};

Digit: PROC [ch: CHAR] RETURNS [BOOL] = INLINE {
RETURN [ch IN ['0..'9]]
};

-- *** Part 2: Extended operations and definitions *** --

Run: PROC
[s1: ROPE, pos1: INT ← 0, s2: ROPE, pos2: INT ← 0, case: BOOLTRUE]
RETURNS [INT];
-- Returns largest number of chars N such that s1 starting at pos1
-- is equal to s2 starting at pos2 for N chars. More formally:
-- FOR i IN [0..N): s1[pos1+i] = s2[pos2+i]
-- If case is true, then case matters.

Match: PROC [pattern, object: ROPE, case: BOOLTRUE] RETURNS [BOOL];
-- Returns TRUE iff object matches the pattern, where the pattern may contain
-- * to indicate that 0 or more characters will match.
-- If case is true, then case matters.

SkipTo: PROC [s: ROPE, pos: INT ← 0, skip: ROPE] RETURNS [INT];
-- Return the lowest position N in s such that s[N] is in the skip rope
-- and N >= pos. If no such character occurs in s, then return s.length.

SkipOver: PROC [s: ROPE, pos: INT ← 0, skip: ROPE] RETURNS [INT];
-- Return the lowest position N in s such that s[N] is NOT in the skip rope
-- and N >= pos. If no such character occurs in s, then return s.length.

Map: PROC
[base: ROPE, start: INT ← 0, len: INT ← MaxLen, action: ActionType]
RETURNS [BOOL];
-- Applies the action to the given range of characters in the rope
-- Returns TRUE when some action returns TRUE
-- BoundsFault occurs on attempting to fetch a character not in the rope

Translate: PROC
[base: ROPE, start: INT ← 0, len: INT ← MaxLen, translator: TranslatorType ← NIL]
RETURNS [new: ROPE];
-- applies the translation to get a new rope
-- if the resulting size > 0, then new does not share with the original rope!
-- if translator = NIL, the identity translation is performed

Flatten: PROC [base: ROPE, start: INT ← 0, len: INT ← MaxLen] RETURNS [Text];
-- Returns a flat rope from the given range of characters
-- BoundsFault occurs if the resulting length would be > LAST[NAT]

FromProc: PROC
[len: INT, p: PROC RETURNS [CHAR], maxPiece: INT ← MaxLen] RETURNS [ROPE];
-- returns a new rope given a proc to apply for each CHAR

FromRefText: PROC [s: REF READONLY TEXT] RETURNS [Text];
-- makes a rope from a REF READONLY TEXT
-- causes copying

ToRefText: PROC [base: ROPE] RETURNS [REF TEXT];
-- makes a REF TEXT from a rope
-- causes copying

FromChar: PROC [c: CHAR] RETURNS [Text];
-- makes a rope from a character

MakeRope: PROC
[base: REF, size: INT, fetch: FetchType, map: MapType ← NIL,
pieceMap: PieceMapType ← NIL] RETURNS [ROPE];
-- Returns a rope using user-supplied procedures and data
-- the user procedures MUST survive as long as the rope does!

PieceMap: PROC
[base: ROPE, start: INT ← 0, len: INT ← MaxLen, action: PieceActionType,
mapUser: BOOLTRUE] RETURNS [BOOL];
-- Applies the action to the given subrope in pieces (max of 1 piece/Substr,
-- 2 pieces/Concat, 3 pieces/Replace, either 1 piece/MakeRope or use user's
-- routine). Returns TRUE when some action returns TRUE.
-- BoundsFault occurs on attempting to fetch a character not in the rope

ContainingPiece: PROC
[ref: ROPE, index: INT ← 0] RETURNS [base: ROPE, start: INT, len: INT];
-- Find the largest piece containg the given index such that the result is
-- either a text or an object variant.
-- (NIL, 0, 0) is returned if the index is NOT in the given rope

Balance: PROC
[base: ROPE, start: INT ← 0, len: INT ← MaxLen, flat: INT ← FlatMax]
RETURNS [ROPE];
-- Returns a balanced rope, possibly with much copying of components
-- flat ← MIN[MAX[flat,FlatMax], LAST[NAT]]
-- len ← MIN[MAX[len,0], Size[base]-start]
-- start < 0 OR start > Size[base] => bounds fault
-- the resulting maxDepth will be limited by 2+log2[len/flat]

VerifyStructure: PROC [s: ROPE] RETURNS [leaves, nodes, maxDepth: INT];
-- traverse the structure of the given rope; return the number of leaves,
-- nodes and the max depth of the rope extra checking is performed to verify
-- invariants a leaf is a text or object variant a node is a non-NIL,
-- non-leaf variant shared leaves and nodes are multiply counted

VerifyFailed: ERROR;
-- occurs when VerifyStructure finds a bad egg
-- should not happen, of course

-- *** Part 3: Miscellaneous definitions *** --

controlOffset: NAT = 100B;
caseOffset: NAT = 'a - 'A;

FetchType: TYPE = PROC [data: REF, index: INT] RETURNS [CHAR];
-- type of fetch routine used to make a user rope

MapType: TYPE =
PROC [base: REF, start, len: INT, action: ActionType] RETURNS [quit: BOOLFALSE];
-- type of user routine used to map over a subrope
-- returns TRUE if some action returns TRUE

ActionType: TYPE = PROC [c: CHAR] RETURNS [quit: BOOLFALSE];
-- type of routine applied when mapping
-- returns TRUE to quit from Map

TranslatorType: TYPE = PROC [old: CHAR] RETURNS [new: CHAR];
-- type of routine supplied to Translate

PieceMapType: TYPE =
PROC [base: REF, start, len: INT, action: PieceActionType]
RETURNS [quit: BOOLFALSE];
-- type of routine used to piecewise map over a subrope
-- returns TRUE if some action returns TRUE

PieceActionType: TYPE =
PROC [base: ROPE, start: INT, len: INT] RETURNS [quit: BOOLFALSE];
-- type of routine applied when mapping pieces
-- returns TRUE to quit from PieceMap

RopeRep: PRIVATE TYPE =
RECORD
[SELECT tag: * FROM
text => [length: NAT, text: PACKED SEQUENCE max: CARDINAL OF CHAR],
node => [SELECT case: * FROM
substr => [size: INT, base: ROPE, start: INT, depth: INTEGER],
concat => [size: INT, base, rest: ROPE, pos: INT,
depth: INTEGER],
replace => [size: INT, base, replace: ROPE,
start, oldPos, newPos: INT, depth: INTEGER],
object => [size: INT, base: REF, fetch: FetchType,
map: MapType, pieceMap: PieceMapType]
ENDCASE]
ENDCASE];

MaxLen: INT = LAST[INT];
FlatMax: CARDINAL = 24;
Text: TYPE = REF TextRep; -- the small, flat variant handle
TextRep: TYPE = RopeRep[text]; -- useful for creating new text variants

END.


For those who care, this is the official explanation of the RopeRep variants:

Note: NIL is allowed as a valid ROPE.

Note: ALL integer components of the representation must be non-negative.

SELECT x: x FROM
text => {-- [0..x.length) is the range of char indexes
-- [0..x.max) is the number of chars of storage reserved
-- all Rope operations creating new text objects init x.length = x.max
-- x.length <= x.max is required
-- the bit pattern of the text IS IDENTICAL to TEXT and StringBody!!!!
  };
node =>
{SELECT x:x FROM
substr =>
  {-- [0..x.size) is the range of char indexes
-- x.base contains chars indexed by [0..x.size)
-- [0..x.size) in x ==> [x.start..x.start+x.size) in x.base
-- Size[x.base] >= x.start + x.base
  };
 concat =>
  {-- [0..x.size) is the range of char indexes
-- x.base contains chars indexed by [0..x.pos)
-- [0..x.pos) in x ==> [0..x.pos) in x.base
-- x.rest contains the chars indexed by [x.pos..x.size)
-- [x.pos..x.size) in x ==> [0..x.size-x.pos) in x.base
-- x.pos = Size[x.base] AND x.size = x.pos + Size[x.rest]
  };
 replace =>
  {-- [0..x.size) is the range of char indexes
-- x.base contains chars indexed by [0..x.start), [x.newPos..x.size)
-- [0..x.start) in x ==> [0..x.start) in x.base
-- [x.newPos..x.size) in x ==> [x.oldPos..Size[x.base]) in x.base
-- x.rest contains the chars indexed by [x.start..x.newPos)
-- [x.start..x.newPos) in x => [0..x.newPos-x.start) in x.base
-- x.size >= x.newPos >= x.start AND x.oldPos >= x.start
-- x.size - x.newPos = Size[x.base] - x.oldPos
  };
 object =>
  {-- [0..x.size) is the range of char indexes
-- x.base if the data needed by the user-supplied operations
-- x.fetch[x.base, i] should fetch the ith char AND x.fetch # NIL
-- x.map[x.base, st, len, action]
-- implements Map[x, st, len, action]
-- x.pieceMap[x.base, st, len, action] should behave
-- implements PieceMap[x, st, len, action, TRUE]
-- it is OK to have x.map = NIL OR x.pieceMap = NIL
  };
ENDCASE => ERROR NoRope}
ENDCASE => ERROR NoRope};