Types
ROPE: TYPE = Rope.ROPE;
Offset: TYPE = TextNode.Offset;
CharClassContent: TYPE = PACKED ARRAY LegalCharacters OF BOOL;
CharClass: TYPE = REF CharClassContent;
Index: TYPE = NAT;
IgnoreLooks: Tioga.Looks = Tioga.allLooks;
ParseTypes:
TYPE = {
concat,
char, charIC, class, anyChar,
nodeBreak, skipToNodeBreak,
beginNode, skipToBeginNode,
fail, succeed, noOp,
skipOverClass, skipOverChar, skipOverCharIC, skipOver,
skipToChar, skipToCharIC, skipToString, skipTo,
skipToEnd, skipToBeginning,
closure, carefulClosure, greedyClosure, carefulGreedyClosure, alt,
field, fieldEquals, fieldEqualsIC, beginAll, endAll, beginALL, endALL};
ParseTreeContent:
TYPE =
RECORD[
SELECT type: ParseTypes
FROM
concat => [concats: LIST OF ParseTree],
char, charIC => [char: CHAR, looks: Tioga.Looks],
class => [class: CharClass, looks: Tioga.Looks, classNumber: Index],
anyChar => [looks: Tioga.Looks],
nodeBreak => NULL,
skipToNodeBreak => [looks: Tioga.Looks],
beginNode => NULL,
skipToBeginNode => NULL,
fail, succeed, noOp => NULL,
skipOverClass => [class: CharClass, looks: Tioga.Looks, classNumber: Index],
skipOverChar, skipOverCharIC => [char: CHAR, looks: Tioga.Looks],
skipOver => [p: ParseTree],
skipToChar, skipToCharIC => [char: CHAR, looks: Tioga.Looks],
skipToString => [p: ParseTree],
skipTo => [p: ParseTree],
skipToEnd, skipToBeginning => NULL,
closure, carefulClosure, greedyClosure, carefulGreedyClosure => [p: ParseTree],
alt => [alts: LIST OF ParseTree],
field => [number: Index, bound: INT, p: ParseTree],
fieldEquals, fieldEqualsIC => [number: Index],
beginAll, endAll, beginALL, endALL => NULL
ENDCASE
];
ParseTree:
TYPE =
REF ParseTreeContent;
EightBit: CHAR = 200C;
CharMask: CHAR = 177C;
LegalCharacters: TYPE = CHAR[0C..177C];
LegalInputCharacters: TYPE = CHAR[1C..177C];
beginClassToken: CHAR = EightBit + 1; -- [
endClassToken: CHAR = EightBit + 2; -- ]
notToken: CHAR = EightBit + 3; -- ~
anyToken: CHAR = EightBit + 4; -- #
nodeBreakToken: CHAR = EightBit + 5; -- $
closureToken: CHAR = EightBit + 6; -- *
greedyClosureToken: CHAR = EightBit + 7; -- **
plusToken: CHAR = EightBit + 8; -- +
greedyPlusToken: CHAR = EightBit + 9; -- ++
beginAltToken: CHAR = EightBit + 10; -- (
endAltToken: CHAR = EightBit + 11; -- )
altSepToken: CHAR = EightBit + 12; -- |
beginFieldToken: CHAR = EightBit + 13; -- <
endFieldToken: CHAR = EightBit + 14; -- >
fieldSepToken: CHAR = EightBit + 15; -- :
boundSepToken: CHAR = EightBit + 16; -- ,
endPatternToken: CHAR = EightBit + 17; -- end of Pattern.
beginAllToken: CHAR = EightBit + 19; -- {
endAllToken: CHAR = EightBit + 20; -- }
subRangeToken: CHAR = EightBit + 21; -- ..
beginNodeToken: CHAR = EightBit +22; --
powerToken: CHAR = EightBit +23; -- !
OpCode:
TYPE = {
matchChar, matchCharIC, matchCharLooks, matchCharLooksIC,
matchClass, matchClassLooks,
matchAnyChar, matchAnyCharLooks,
matchNodeBreak, skipToNodeBreak, skipToNodeBreakLooks,
matchBeginNode,
fail, succeed, noOp,
skipOverClass, skipOverClassLooks,
skipOverChar, skipOverCharLooks,
skipOverCharIC, skipOverCharLooksIC,
skipToChar, skipToCharLooks,
skipToCharIC, skipToCharLooksIC,
skipToString, endOfSkipToString,
skipToEnd, skipToBeginning,
closure, carefulClosure, carefulClosureEnd,
greedyClosure, carefulGreedyClosure, carefulGreedyClosureEnd, alt,
fieldStart, fieldEnd, boundNodeBreaks,
fieldEquals, fieldEqualsLooks, fieldEqualsIC, fieldEqualsLooksIC,
beginAll, endAll
};
ReturnCode:
TYPE = {skipToStringRet, closureRet, carefulClosureRet, greedyClosureRet, carefulGreedyClosureRet, altRet};
PatternPC: TYPE = NAT;
TextStackArray: TYPE = RECORD[stack: SEQUENCE length: NAT OF Offset];
PatternStackArray: TYPE = RECORD[stack: SEQUENCE length: NAT OF PatternPC];
PatternOpCodeArray: TYPE = RECORD[opCodes: SEQUENCE length: NAT OF OpCode];
PatternDataArray: TYPE = RECORD[opCodes: SEQUENCE length: NAT OF NAT];
PatternNextArray: TYPE = RECORD[opCodes: SEQUENCE length: NAT OF PatternPC];
PatternLooksArray: TYPE = RECORD[looks: SEQUENCE length: NAT OF Tioga.Looks];
ClassArray: TYPE = RECORD[classes: SEQUENCE length: NAT OF CharClass];
ReturnCodeArray: TYPE = RECORD[returnCodes: SEQUENCE length: NAT OF ReturnCode];
NameArray: TYPE = RECORD [array: SEQUENCE length: NAT OF NameRec];
NameRec: TYPE = RECORD [name: ROPE, at, atEnd: Offset, looks: Tioga.Looks];
CodeContent:
TYPE =
RECORD[
opCodes: REF PatternOpCodeArray,
looks: REF PatternLooksArray,
data: REF PatternDataArray,
next: REF PatternNextArray
];
Code: TYPE = REF CodeContent;
StackContent:
TYPE =
RECORD[
pos: Index,
pc: REF PatternStackArray,
nextPos: REF TextStackArray,
returnCode: REF ReturnCodeArray
];
Stack: TYPE = REF StackContent;
FinderRecord:
TYPE =
RECORD [
stack: Stack,
forwardProgram: Code,
backwardProgram: Code,
classes: REF ClassArray,
nameArray: REF NameArray,
nodeReader: NodeReader.Ref,
doLooks: BOOL ¬ FALSE,
wordSearch: BOOL ¬ FALSE
];
Finder:
TYPE =
REF FinderRecord;
Operations stolen from TextFind, exported from RegExpFind2Impl
SearchRope:
PROC [finder: Finder, rope:
ROPE, start:
INT ¬ 0, len:
INT ¬
LAST[
INT],
interrupt:
REF
BOOL ¬
NIL]
RETURNS [found:
BOOL, at, atEnd, before, after:
INT];
Try:
PROC [finder: Finder, text: Tioga.Node,
start:
INT ¬ 0, len:
INT ¬
LAST[
INT], looksExact:
BOOL ¬
FALSE,
interrupt:
REF
BOOL ¬
NIL]
RETURNS [found:
BOOL, at, atEnd, before, after:
INT];
searches text for first match in [start..start+len)
i.e., searches up from start until reaches start+len
if finds a match, returns with found = true,
at = beginning of match or location corresponding to { in pattern
atEnd = end of match or location corresponding to } in pattern
after = end of entire match, which may be > atEnd if used } in pattern
before = start of entire match, which may be < at if used { in pattern
if looksExact is true, then match by equality, else by subset
if interrupt^ becomes true, will stop searching
SearchRopeBackwards:
PROC [finder: Finder, rope:
ROPE, start:
INT ¬ 0, len:
INT ¬
LAST[
INT], interrupt:
REF
BOOL ¬
NIL]
RETURNS [found:
BOOL, at, atEnd, before, after:
INT];
TryBackwards:
PROC [finder: Finder, text: Tioga.Node,
start:
INT ¬ 0, len:
INT ¬
LAST[
INT], looksExact:
BOOL ¬
FALSE,
interrupt:
REF
BOOL ¬
NIL]
RETURNS [found:
BOOL, at, atEnd, before, after:
INT];
searches text for last match in [start..start+len)
i.e., searches down from start+len until reaches start
}.