RegularExpression.mesa
Copyright © 1985 by Xerox Corporation. All rights reserved.
Nix, December 20, 1983 2:39 pm
Russ Atkinson (RRA) April 25, 1985 5:26:16 am PST
DIRECTORY
TextLooks USING [Looks, Runs],
RunReader USING [Ref],
LooksReader USING [Ref],
TextNode USING [Offset, RefTextNode],
Rope USING [ROPE],
RopeReader USING [Ref];
RegularExpression: CEDAR DEFINITIONS = {
Operations & types stolen from TextFind
MalformedPattern: ERROR [ec: PatternErrorCode];
PatternErrorCode: TYPE = {
toobig, -- pattern too long
endquote, -- pattern ends with '
endtilda, -- pattern ends with ~
boundary, -- pattern has | inside rather than at beginning or end
missingNameEnd, -- pattern has < without matching >
unmatchedNameEnd -- pattern has > without previous <
};
RefTextNode: TYPE = TextNode.RefTextNode;
Operations stolen from TextFind, exported from RegExpFindImpl
Create: PROC [pattern: RefTextNode,
literal, word, ignoreLooks, ignoreCase, addBounds: BOOLFALSE,
patternStart: INT ← 0, patternLen: INTLAST[INT]] RETURNS [finder: Finder];
creates a record containing a "compiled" version of the pattern
if literal is true, each character in pattern is taken literally
if word flag is true, pattern only matches if no adjacent letters or digits
if ignoreLooks is true, then ignore the looks of the pattern characters
if ignoreCase is false, then alpha chars in pattern must match case
otherwise, they can match either upper or lower
if addBounds is true, add |'s to both ends of pattern
CreateFromRope: PROC [pattern: ROPE,
literal, word, ignoreCase, addBounds: BOOLFALSE,
patternStart: INT ← 0, patternLen: INTLAST[INT]] RETURNS [finder: Finder];
NameLooks: PROC [finder: Finder, name: ROPE] RETURNS [looks: TextLooks.Looks];
name is the name of a subpattern
value are looks that name had in the pattern
NameLoc: PROC [finder: Finder, name: ROPE] RETURNS [at, atEnd: INT];
name is the name of a subpattern
value is where that subpattern matched last time
Operations stolen from TextFind, exported from RegExpFind2Impl
SearchRope: PROC [finder: Finder, rope: ROPE, start: INT ← 0, len: INTLAST[INT],
interrupt: REF BOOLNIL] RETURNS [found: BOOL, at, atEnd, before, after: INT];
Try: PROC [finder: Finder, text: RefTextNode,
start: INT ← 0, len: INTLAST[INT], looksExact: BOOLFALSE,
interrupt: REF BOOLNIL] RETURNS [found: BOOL, at, atEnd, before, after: INT];
searches text for first match in [start..start+len)
i.e., searches up from start until reaches start+len
if finds a match, returns with found = true,
at = beginning of match or location corresponding to { in pattern
atEnd = end of match or location corresponding to } in pattern
after = end of entire match, which may be > atEnd if used } in pattern
before = start of entire match, which may be < at if used { in pattern
if looksExact is true, then match by equality, else by subset
if interrupt^ becomes true, will stop searching
SearchRopeBackwards: PROC [finder: Finder, rope: ROPE, start: INT ← 0, len: INTLAST[INT], interrupt: REF BOOLNIL] RETURNS [found: BOOL, at, atEnd, before, after: INT];
TryBackwards: PROC [finder: Finder, text: RefTextNode,
start: INT ← 0, len: INTLAST[INT], looksExact: BOOLFALSE,
interrupt: REF BOOLNIL] RETURNS [found: BOOL, at, atEnd, before, after: INT];
searches text for last match in [start..start+len)
i.e., searches down from start+len until reaches start
Operations
Search: PUBLIC PROC [finder: Finder, rope: ROPE, runs: TextLooks.Runs, start: Offset, len: Offset, looksExact: BOOL, interrupt: REF BOOLNIL] RETURNS [found: BOOL, at, atEnd, before, after: Offset];
OptimizeForwardSearch: PROC[p: ParseTree, ccl: LIST OF ParseTree, numCcl: Index] RETURNS [np: ParseTree, newCcl: LIST OF ParseTree, newNumCcl: Index];
OptimizeBackwardSearch: PROC[p: ParseTree, ccl: LIST OF ParseTree, numCcl: Index] RETURNS [np: ParseTree, newCcl: LIST OF ParseTree, newNumCcl: Index];
Compile: PROC [p: ParseTree] RETURNS [c: Code];
Types
ROPE: TYPE = Rope.ROPE;
Offset: TYPE = TextNode.Offset;
CharClassContent: TYPE = PACKED ARRAY LegalCharacters OF BOOL;
CharClass: TYPE = REF CharClassContent;
Index: TYPE = NAT;
IgnoreLooks: TextLooks.Looks = ALL[TRUE];  -- A bogus definition, but it'll work.
ParseTypes: TYPE = {
concat,
char, charIC, class, anyChar,
nodeBreak, skipToNodeBreak,
beginNode, skipToBeginNode,
fail, succeed, noOp,
skipOverClass, skipOverChar, skipOverCharIC, skipOver,
skipToChar, skipToCharIC, skipToString, skipTo,
skipToEnd, skipToBeginning,
closure, carefulClosure, greedyClosure, carefulGreedyClosure, alt,
field, fieldEquals, fieldEqualsIC, beginAll, endAll, beginALL, endALL};
ParseTreeContent: TYPE = RECORD[
SELECT type: ParseTypes FROM
concat => [concats: LIST OF ParseTree],
char, charIC => [char: CHAR, looks: TextLooks.Looks],
class => [class: CharClass, looks: TextLooks.Looks, classNumber: Index],
anyChar => [looks: TextLooks.Looks],
nodeBreak => NULL,
skipToNodeBreak => [looks: TextLooks.Looks],
beginNode => NULL,
skipToBeginNode => NULL,
fail, succeed, noOp => NULL,
skipOverClass => [class: CharClass, looks: TextLooks.Looks, classNumber: Index],
skipOverChar, skipOverCharIC => [char: CHAR, looks: TextLooks.Looks],
skipOver => [p: ParseTree],
skipToChar, skipToCharIC => [char: CHAR, looks: TextLooks.Looks],
skipToString => [p: ParseTree],
skipTo => [p: ParseTree],
skipToEnd, skipToBeginning => NULL,
closure, carefulClosure, greedyClosure, carefulGreedyClosure => [p: ParseTree],
alt => [alts: LIST OF ParseTree],
field => [number: Index, bound: INT, p: ParseTree],
fieldEquals, fieldEqualsIC => [number: Index],
beginAll, endAll, beginALL, endALL => NULL
ENDCASE
];
ParseTree: TYPE = REF ParseTreeContent;
EightBit: CHAR = 200C;
CharMask: CHAR = 177C;
LegalCharacters: TYPE = CHAR[0C..177C];
LegalInputCharacters: TYPE = CHAR[1C..177C];
beginClassToken: CHAR = EightBit + 1; -- [
endClassToken: CHAR = EightBit + 2;  -- ]
notToken: CHAR = EightBit + 3;  -- ~
anyToken: CHAR = EightBit + 4;  -- #
nodeBreakToken: CHAR = EightBit + 5; -- $
closureToken: CHAR = EightBit + 6;  -- *
greedyClosureToken: CHAR = EightBit + 7; -- **
plusToken: CHAR = EightBit + 8;  -- +
greedyPlusToken: CHAR = EightBit + 9; -- ++
beginAltToken: CHAR = EightBit + 10; -- (
endAltToken: CHAR = EightBit + 11;  -- )
altSepToken: CHAR = EightBit + 12;  -- |
beginFieldToken: CHAR = EightBit + 13; -- <
endFieldToken: CHAR = EightBit + 14; -- >
fieldSepToken: CHAR = EightBit + 15;  -- :
boundSepToken: CHAR = EightBit + 16; -- ,
endPatternToken: CHAR = EightBit + 17; -- end of Pattern.
beginAllToken: CHAR = EightBit + 19; -- {
endAllToken: CHAR = EightBit + 20;  -- }
subRangeToken: CHAR = EightBit + 21; -- ..
beginNodeToken: CHAR = EightBit +22; -- ^
powerToken: CHAR = EightBit +23;  -- !
OpCode: TYPE = {
matchChar, matchCharIC, matchCharLooks, matchCharLooksIC,
matchClass, matchClassLooks,
matchAnyChar, matchAnyCharLooks,
matchNodeBreak, skipToNodeBreak, skipToNodeBreakLooks,
matchBeginNode,
fail, succeed, noOp,
skipOverClass, skipOverClassLooks,
skipOverChar, skipOverCharLooks,
skipOverCharIC, skipOverCharLooksIC,
skipToChar, skipToCharLooks,
skipToCharIC, skipToCharLooksIC,
skipToString, endOfSkipToString,
skipToEnd, skipToBeginning,
closure, carefulClosure, carefulClosureEnd,
greedyClosure, carefulGreedyClosure, carefulGreedyClosureEnd, alt,
fieldStart, fieldEnd, boundNodeBreaks,
fieldEquals, fieldEqualsLooks, fieldEqualsIC, fieldEqualsLooksIC,
beginAll, endAll
};
ReturnCode: TYPE = {skipToStringRet, closureRet, carefulClosureRet, greedyClosureRet, carefulGreedyClosureRet, altRet};
PatternPC: TYPE = NAT;
TextStackArray: TYPE = RECORD[stack: SEQUENCE length: NAT OF Offset];
PatternStackArray: TYPE = RECORD[stack: SEQUENCE length: NAT OF PatternPC];
PatternOpCodeArray: TYPE = RECORD[opCodes: SEQUENCE length: NAT OF OpCode];
PatternDataArray: TYPE = RECORD[opCodes: SEQUENCE length: NAT OF NAT];
PatternNextArray: TYPE = RECORD[opCodes: SEQUENCE length: NAT OF PatternPC];
PatternLooksArray: TYPE = RECORD[looks: SEQUENCE length: NAT OF TextLooks.Looks];
ClassArray: TYPE = RECORD[classes: SEQUENCE length: NAT OF CharClass];
ReturnCodeArray: TYPE = RECORD[returnCodes: SEQUENCE length: NAT OF ReturnCode];
NameArray: TYPE = RECORD [array: SEQUENCE length: NAT OF NameRec];
NameRec: TYPE = RECORD [name: ROPE, at, atEnd: Offset, looks: TextLooks.Looks];
CodeContent: TYPE = RECORD[
opCodes: REF PatternOpCodeArray,
looks: REF PatternLooksArray,
data: REF PatternDataArray,
next: REF PatternNextArray
];
Code: TYPE = REF CodeContent;
StackContent: TYPE = RECORD[
pos: Index,
pc: REF PatternStackArray,
nextPos: REF TextStackArray,
returnCode: REF ReturnCodeArray
];
Stack: TYPE = REF StackContent;
FinderRecord: TYPE = RECORD [
stack: Stack,
forwardProgram: Code,
backwardProgram: Code,
classes: REF ClassArray,
nameArray: REF NameArray,
ropeReader: RopeReader.Ref,
lksReader: LooksReader.Ref,
runReader: RunReader.Ref,
wordSearch: BOOLFALSE
];
Finder: TYPE = REF FinderRecord;
}.