RegExpFindImpl.Mesa
Copyright © 1985 by Xerox Corporation. All rights reserved.
derived from Paxton's TextFindImpl of Tioga
which was derived from EditFind.Mesa of Laurel 6
Nix, December 21, 1983 4:22 pm
Russ Atkinson (RRA) April 25, 1985 5:18:31 am PST
Peter Kessler September 30, 1986 12:40:58 pm PDT
DIRECTORY
Ascii USING [Lower, Upper],
List USING [Append, Nconc, Reverse],
LooksReader USING [Create, Get, SetPosition],
RegularExpression USING [altSepToken, anyToken, beginAllToken, beginAltToken, beginClassToken, beginFieldToken, beginNodeToken, boundSepToken, CharClass, CharClassContent, ClassArray, closureToken, Compile, endAllToken, endAltToken, endClassToken, endFieldToken, endPatternToken, fieldSepToken, FinderRecord, greedyClosureToken, greedyPlusToken, IgnoreLooks, Index, LegalInputCharacters, NameArray, nodeBreakToken, notToken, OptimizeBackwardSearch, OptimizeForwardSearch, ParseTree, ParseTreeContent, ParseTypes, PatternErrorCode, PatternStackArray, plusToken, powerToken, ReturnCodeArray, StackContent, subRangeToken, TextStackArray],
Rope USING [Concat, Equal, Fetch, FromChar, ROPE, Size],
RopeReader USING [Create, Get, GetIndex, ReadOffEnd, SetPosition],
RunReader USING [NoMoreRuns],
RuntimeError USING [BoundsFault],
TextEdit USING [GetRope, GetRuns],
TextLooks USING [Looks, noLooks, Runs],
TextNode USING [Offset, RefTextNode];
RegExpFindImpl: CEDAR PROGRAM
IMPORTS Ascii, List, LooksReader, RegularExpression, Rope, RopeReader, RunReader, RuntimeError, TextEdit
EXPORTS RegularExpression = {
OPEN RegularExpression;
RegExpPatternErrorCode: TYPE = {
tooBig,  -- The pattern is too big.
illegalCharacter,
improperAltSeparator,
notInsideAlt,
notInsideField,
moreThanOneBeginAll,
noMatchingBeginAll,
nameMustBeAString,
theAllNameIsReserved,
secondOccurenceOfFieldMustNotContainPattern,
expectedEndOfField,
unexpectedEndOfPattern,
noClosingEndAll,
invalidNot,
illegalOctalNumber,
unknownAbbreviation
};
MalformedPattern: PUBLIC ERROR [ec: RegularExpression.PatternErrorCode] = CODE;
MaxLen: Offset = LAST[Offset];
Finder: TYPE = REF FinderRec;
FinderRec: PUBLIC TYPE = FinderRecord;
Offset: TYPE = TextNode.Offset;
ROPE: TYPE = Rope.ROPE;
RefTextNode: TYPE = TextNode.RefTextNode;
***** Operations *****
NameLoc: PUBLIC PROC [finder: Finder, name: ROPE] RETURNS [at, atEnd: Offset] = {
nameArray: REF NameArray ← IF finder # NIL THEN finder.nameArray ELSE NIL;
at ← atEnd ← 0;
IF nameArray = NIL THEN RETURN;
IF Rope.Equal["all", name, FALSE] THEN RETURN [nameArray[0].at, nameArray[0].atEnd];
FOR i:NAT IN [0..nameArray.length) DO
IF Rope.Equal[nameArray[i].name, name] THEN
RETURN [nameArray[i].at, nameArray[i].atEnd];
ENDLOOP;
};
NameLooks: PUBLIC PROC [finder: Finder, name: ROPE] RETURNS [looks: TextLooks.Looks] = {
nameArray: REF NameArray ← IF finder # NIL THEN finder.nameArray ELSE NIL;
looks ← TextLooks.noLooks;
IF nameArray = NIL THEN RETURN;
IF Rope.Equal["all", name, FALSE] THEN RETURN [nameArray[0].looks];
FOR i:NAT IN [0..nameArray.length) DO
IF Rope.Equal[nameArray[i].name, name] THEN RETURN [nameArray[i].looks];
ENDLOOP;
};
Create: PUBLIC PROC [pattern: RefTextNode, literal, word, ignoreLooks, ignoreCase, addBounds: BOOLEAN, patternStart: INT ← 0, patternLen: INTLAST[INT]] RETURNS [finder: Finder] = {
patternRope: ROPE ← TextEdit.GetRope[pattern];
patternRuns: TextLooks.Runs ← TextEdit.GetRuns[pattern];
RETURN [CreateFromParts[patternRope,patternRuns,literal,word,
ignoreLooks,ignoreCase,addBounds,patternStart,patternLen]];
}; 
CreateFromRope: PUBLIC PROC [pattern: ROPE, literal, word, ignoreCase, addBounds: BOOLEAN, patternStart: Offset, patternLen: Offset] RETURNS [finder: Finder] = {
RETURN [CreateFromParts[pattern,NIL,literal,word,TRUE,
ignoreCase,addBounds,patternStart,patternLen]]
};
CreateFromParts: PROC [patternRope: ROPE, patternRuns: TextLooks.Runs, literal, word, ignoreLooks, ignoreCase, addBounds: BOOLEAN, patternStart: Offset, patternLen: Offset] RETURNS [finder: Finder] = {
ENABLE RuntimeError.BoundsFault => ERROR MalformedPattern[toobig];
SimpleSymbolTableEntry: TYPE = RECORD[name: ROPE, number: Index];
SimpleSymbolTable: TYPE = LIST OF REF SimpleSymbolTableEntry;
nameList: SimpleSymbolTable;
numberOfFields: Index;
parsedPatternList: LIST OF ParseTree;
forwardPattern, backwardPattern: ParseTree;
char, patternChar: CHAR ← 377C;
pEnd, pPos: Offset;
numCharClasses: Index ← 0;
charClassList: LIST OF ParseTree ← NIL;
insideNamedPat: BOOLEANFALSE;
lastPhysicalCharUnread: BOOLFALSE;
theLastPhysicalCharUnread: CHAR;
inAbbreviation: BOOLFALSE;
abbreviationPos: Offset;
abbreviation: ROPE;
lastCharacterRead: CHAR;
looksRead: TextLooks.Looks ← TextLooks.noLooks;
GetNextChar: PROC [eofOK: BOOL] RETURNS [c: CHAR] = {
IF lastPhysicalCharUnread THEN {
lastPhysicalCharUnread ← FALSE;
RETURN[theLastPhysicalCharUnread];
};
IF inAbbreviation THEN {
abbreviationPos ← abbreviationPos + 1;
IF abbreviationPos < abbreviation.Size[]
THEN c ← abbreviation.Fetch[abbreviationPos]
ELSE inAbbreviation ← FALSE;
};
IF ~inAbbreviation THEN {
IF pPos >= pEnd THEN GOTO gotEnd;
pPos ← pPos + 1;
c ← finder.ropeReader.Get[! RopeReader.ReadOffEnd => GOTO gotEnd];
IF finder.lksReader = NIL
THEN looksRead ← IgnoreLooks
ELSE looksRead ← LooksReader.Get[finder.lksReader
! RunReader.NoMoreRuns => {looksRead ← TextLooks.noLooks; CONTINUE }];
};
IF ignoreCase THEN c ← Ascii.Upper[c];
EXITS
gotEnd => {
c ← endPatternToken;
looksRead ← TextLooks.noLooks;
IF ~eofOK THEN SyntaxError[unexpectedEndOfPattern];
};
};
UnReadLastPhysicalChar: PROC [c: CHAR] = {
IF lastPhysicalCharUnread THEN ERROR;
theLastPhysicalCharUnread ← c;
lastPhysicalCharUnread ← TRUE;
};
AbbreviationRec: TYPE = RECORD[char: CHAR, abbreviation: ROPE];
abbreviations: LIST OF AbbreviationRec ← LIST[
['A, "([a..zA..Z0..9]++)"],
['B, "([ '011..'015]++)"],
['D, "([0..9]+.[0..9]**|[0..9]*.[0..9]++|[0..9]++)"],
['N, "('015)"],
['Q, "(\"[~\"]*\"|``[~'']*''''|`[~'']*'')"],
['S, "([~ '011..'015]++)"],
['W, "([a..zA..Z]++)"],
['^, "['001..'037]"]
];
SetUpAbbreviation: PROC [c: CHAR] = {
IF inAbbreviation THEN ERROR;
c ← Ascii.Upper[c];
FOR l: LIST OF AbbreviationRec ← abbreviations, l.rest UNTIL l = NIL DO
IF c = l.first.char THEN {
inAbbreviation ← TRUE;
abbreviationPos ← -1;
abbreviation ← l.first.abbreviation;
RETURN;
};
ENDLOOP;
SyntaxError[unknownAbbreviation];
};
charUnRead: BOOLFALSE;
GetToken: PROC [] RETURNS [char: CHAR] = {
IF charUnRead THEN {
charUnRead ← FALSE;
RETURN[lastCharacterRead];
};
lastCharacterRead ← char ← GetNextChar[TRUE];
IF literal THEN
IF char = endPatternToken OR char IN LegalInputCharacters
THEN RETURN[char] ELSE SyntaxError[illegalCharacter];
IF char IN ['A..'Z] OR char IN ['a..'z] OR char IN ['0..'9] THEN RETURN[char];
SELECT char FROM
'[ => char ← beginClassToken;
'] => char ← endClassToken;
'~ => char ← notToken;
'# => char ← anyToken;
'$ => char ← nodeBreakToken;
'^ => char ← beginNodeToken;
'! => char ← powerToken;
'( => char ← beginAltToken;
') => char ← endAltToken;
'| => char ← altSepToken;
'< => char ← beginFieldToken;
'> => char ← endFieldToken;
': => char ← fieldSepToken;
', => char ← boundSepToken;
'{ => char ← beginAllToken;
'} => char ← endAllToken;
'' => {
char ← GetNextChar[FALSE];
IF char IN ['0..'7] THEN {
c2: CHAR ← GetNextChar[FALSE];
c3: CHAR ← GetNextChar[FALSE];
octalIndex: CARDINAL;
IF ~(c2 IN ['0..'7]) OR ~(c3 IN ['0..'7]) THEN
SyntaxError[illegalOctalNumber];
octalIndex ← (char-'0)*64 + (c2-'0)*8+(c3-'0);
IF octalIndex > 127 THEN SyntaxError[illegalOctalNumber];
char ← VAL[octalIndex];
};
IF ~(char IN LegalInputCharacters) THEN SyntaxError[illegalCharacter];
};
'* => {
c: CHAR ← GetNextChar[TRUE];
IF c = '*
THEN
char ← greedyClosureToken
ELSE {
UnReadLastPhysicalChar[c];
char ← closureToken;
};
};
'+ => {
c: CHAR ← GetNextChar[TRUE];
IF c = '+
THEN
char ← greedyPlusToken
ELSE {
UnReadLastPhysicalChar[c];
char ← plusToken;
};
};
'. => {
c: CHAR ← GetNextChar[TRUE];
IF c = '.
THEN
char ← subRangeToken
ELSE {
UnReadLastPhysicalChar[c];
char ← '.;
};
};
'\\ => {
c: CHAR ← GetNextChar[FALSE];
SetUpAbbreviation[c];
RETURN[GetToken[]];
};
endPatternToken => NULL;
ENDCASE => IF ~(char IN LegalInputCharacters) THEN SyntaxError[illegalCharacter];
lastCharacterRead ← char;
RETURN[char];
};
UnReadToken: PROC[] = {
IF charUnRead THEN ERROR;
charUnRead ← TRUE;
};
SyntaxError: PROC[kind: RegExpPatternErrorCode] = {
ERROR MalformedPattern[toobig];
MalformedPattern[kind, MAX[0, IF lastPhysicalCharUnread THEN pPos-2 ELSE pPos-1]];
};
Parses the character class notation, which in its simplest form is a sequence of characters between []'s, e.g. [0123456789], which specifies a pattern that will match any character in the sequence. Ranges of characters may be specified with .., e.g. [A..F] is the same as [ABCDEF]. Special characters can and must be quoted, e.g. ['(')''] will match either a left parenthesis, a right parenthesis, or a quote. All characters but those in the class may be matched by using a ~ as the first symbol in the class, e.g. [~A..Za..z0..9] will match all but the alphanumeric characters. If ignoreCase is true, then if the set includes a lower case 'x, it will be made to also include the upper case 'X, and vice-versa.
ParseCharClass: PROC [] RETURNS [r: ParseTree] = {
ccr: REF ParseTreeContent.class ← NEW[ParseTreeContent.class];
complement: BOOLFALSE;
c, lastChar: CHAR ← beginClassToken;
charClass: CharClass ← NEW[CharClassContent ← ALL[FALSE]];
charClassList ← CONS[ccr, charClassList];
ccr.classNumber ← numCharClasses;
numCharClasses ← numCharClasses + 1;
IF GetToken[] = notToken THEN
complement ← TRUE
ELSE
UnReadToken[];
ccr.looks ← looksRead;
ccr.class ← charClass;
r ← ccr;
WHILE (c ← GetToken[]) # endClassToken DO
SELECT c FROM
subRangeToken => {
IF ~(lastChar IN LegalInputCharacters) THEN SyntaxError[illegalCharacter];
c ← GetToken[];
IF ~(c IN LegalInputCharacters) THEN SyntaxError[illegalCharacter];
FOR x: CHAR IN [lastChar..c] DO
charClass[x] ← TRUE;
ENDLOOP;
lastChar ← subRangeToken;
};
NOT IN LegalInputCharacters =>
SyntaxError[illegalCharacter]
ENDCASE => {
lastChar ← c;
charClass[c] ← TRUE;
};
ENDLOOP;
IF ignoreCase THEN
FOR x: CHAR IN ['a..'z] DO
SELECT TRUE FROM
charClass[x] => charClass[x-'a+'A] ← TRUE;
charClass[x-'a+'A] => charClass[x] ← TRUE;
ENDCASE => NULL;
ENDLOOP;
IF complement THEN
FOR x: CHAR IN LegalInputCharacters DO
charClass[x] ← ~charClass[x];
ENDLOOP;
};
X ::=
non-special character Not one of '[]~#$*+(){}<>\ .. 
'special character One of the above. (Handled in tokenizer)
'nnn  octal control characters. (Handled in tokenizer)
[character class] Character class notation, A..Z means ASCII interval A..Z
[~character class] Not the characters in this class.
#  Any character.
$  Node break.
^  Beginning of node.
\x  Predefined patterns; x is an alphanumeric character, or <string>.
   (Handled in tokenizer)
ParseX: PROC [] RETURNS [p: ParseTree ← NIL] = {
c: CHAR ← GetToken[];
SELECT c FROM
IN LegalInputCharacters =>
IF ignoreCase AND c IN ['A..'Z] THEN
p ← NEW[ParseTreeContent.charIC ← [charIC[c, looksRead]]]
ELSE
p ← NEW[ParseTreeContent.char ← [char[c, looksRead]]];
beginClassToken =>
p ← ParseCharClass[];
anyToken =>
p ← NEW[ParseTreeContent.anyChar ← [anyChar[looksRead]]];
nodeBreakToken =>
p ← NEW[ParseTreeContent.nodeBreak];
beginNodeToken =>
p ← NEW[ParseTreeContent.beginNode];
ENDCASE => UnReadToken[];
};
Z ::=
X A single-character pattern.
(P|P|...|P) Alternation.
ParseZ: PROC [] RETURNS [p: ParseTree ← NIL] = {
c: CHAR ← GetToken[];
SELECT c FROM
beginAltToken => {
l: LIST OF ParseTree ← NIL;
DO
q: ParseTree ← ParseP[];
IF q = NIL THEN q ← NEW[ParseTreeContent.noOp];
l ← CONS[q, l];
c ← GetToken[];
SELECT c FROM
endAltToken => EXIT;
altSepToken => {};
ENDCASE => SyntaxError[improperAltSeparator];
ENDLOOP;
SELECT TRUE FROM
l = NIL => RETURN [NIL];
l.rest = NIL => RETURN [l.first];
ENDCASE => TRUSTED {
RETURN[NEW[ParseTreeContent.alt ← [alt[LOOPHOLE[List.Reverse[LOOPHOLE[l]]]]]]];
};
};
endAltToken, altSepToken => {
UnReadToken[];
RETURN[NIL]
}
ENDCASE => {
UnReadToken[];
p ← ParseX[];
};
};
The pattern matching fragments for full patterns.
P ::=
null The empty pattern.
Z       
Z*P Min-matching closure.
Z**P Greedy closure.
Z+P PP*
Z++P PP**
~ZP Deterministically match anything up to but not including P.
ParseP: PROC [] RETURNS [ParseTree] = {
l: LIST OF ParseTree ← NIL;
p: ParseTree;
DO
c: CHAR ← GetToken[];
IF c = notToken
THEN
p ← NEW[ParseTreeContent.skipTo ← [skipTo[ParseZ[]]]]
ELSE {
UnReadToken[];
p ← ParseZ[];
IF p = NIL THEN EXIT;
c ← GetToken[];
SELECT c FROM
closureToken =>
p ← NEW[ParseTreeContent.closure ← [closure[p]]];
greedyClosureToken =>
p ← NEW[ParseTreeContent.greedyClosure ← [greedyClosure[p]]];
plusToken =>
p ← NEW[ParseTreeContent.concat ← [concat[LIST[p, NEW[ParseTreeContent.closure ← [closure[p]]]]]]];
greedyPlusToken =>
p ← NEW[ParseTreeContent.concat ← [concat[LIST[p, NEW[ParseTreeContent.greedyClosure ← [greedyClosure[p]]]]]]];
powerToken => {
l: LIST OF ParseTree ← NIL;
iterations: Index ← ParseNumber[FALSE];
WHILE iterations > 0 DO
l ← CONS[p, l];
iterations ← iterations - 1;
ENDLOOP;
p ← NEW[ParseTreeContent.concat ← [concat[l]]];
};
ENDCASE => UnReadToken[];
};
IF p.type = concat
THEN {
pp: REF ParseTreeContent.concat ← NARROW[p];
FOR ll: LIST OF ParseTree ← pp.concats, ll.rest UNTIL ll = NIL DO
l ← CONS[ll.first, l];
ENDLOOP;
}
ELSE
l ← CONS[p, l];
IF l.first.type = closure
THEN {
q: REF ParseTreeContent.closure ← NARROW[l.first];
IF q.p.type = closure OR q.p.type = greedyClosure THEN
l.first ← q.p;
}
ELSE IF l.first.type = greedyClosure THEN {
q: REF ParseTreeContent.greedyClosure ← NARROW[l.first];
IF q.p.type = closure THEN {
qq: REF ParseTreeContent.closure ← NARROW[q.p];
q.p ← qq.p;
}
ELSE IF q.p.type = greedyClosure THEN
l.first ← q.p;
};
ENDLOOP;
SELECT TRUE FROM
l = NIL => RETURN[NIL];
l.rest = NIL => RETURN[l.first];
ENDCASE => TRUSTED {
RETURN[NEW[ParseTreeContent.concat ← [concat[LOOPHOLE[List.Reverse[LOOPHOLE[l]]]]]]];
};
};
A simple A-list symbol table that maps field names to numbers.
The top level syntax.
T ::=
<name:P>T  Named portions. Valid only at top level. Reserved name ALL.
<name,number:P>T Bound the number of CR's matched by P to number.
PT  Concatenation.
P  Or just a pattern.
The tipity-top level syntax.
TT ::=
T
T{T}T  Delimits the virtual start and end of the text matched by <ALL>.
ParseTopLevel: PROC [] RETURNS [l: LIST OF ParseTree ← NIL, nameList: SimpleSymbolTable ← NIL, numberFields: Index ← 0] = {
seenBeginAllToken, seenEndAllToken: BOOLFALSE;
DO
c: CHAR ← GetToken[];
SELECT c FROM
beginFieldToken => {
field: ParseTree ← NIL;
[field, nameList, numberFields] ← ParseField[nameList, numberFields];
l ← CONS[field, l];
};
endFieldToken => SyntaxError[notInsideField];
beginAllToken => {
IF seenBeginAllToken THEN SyntaxError[moreThanOneBeginAll];
seenBeginAllToken ← TRUE;
l ← CONS[NEW[ParseTreeContent.beginALL], l];
};
endAllToken => {
IF ~seenBeginAllToken OR seenEndAllToken THEN
SyntaxError[noMatchingBeginAll];
seenEndAllToken ← TRUE;
l ← CONS[NEW[ParseTreeContent.endALL], l];
};
endPatternToken => {
IF ~seenBeginAllToken THEN
l ← CONS[NEW[ParseTreeContent.endALL], l];
IF seenBeginAllToken AND ~seenEndAllToken THEN
SyntaxError[noClosingEndAll];
l ← CONS[NEW[ParseTreeContent.endAll], l];
EXIT;
};
ENDCASE => {
p: ParseTree;
UnReadToken[];
p ← ParseP[];
IF p = NIL THEN SyntaxError[illegalCharacter];
WITH p SELECT FROM
z: REF ParseTreeContent.concat => TRUSTED {
l ← LOOPHOLE[List.Nconc[List.Reverse[LOOPHOLE[z.concats]], LOOPHOLE[l]]];
};
ENDCASE => l ← CONS[p, l];
};
ENDLOOP;
TRUSTED {l ← LOOPHOLE[List.Reverse[LOOPHOLE[l]]]};
IF ~seenBeginAllToken THEN
l ← CONS[NEW[ParseTreeContent.beginALL], l];
l ← CONS[NEW[ParseTreeContent.beginAll], l];
};
ParseField: PROC [names: SimpleSymbolTable, number: Index] RETURNS [field: ParseTree, newNames: SimpleSymbolTable, newNumber: Index] = {
This routine parses a field, which is of the form <name,bound:pattern>. The name is a sequence of alphabetic characters; the bound is a non-negative integer, and the pattern is a P. The bound and pattern are optional. By default, there is no bound and the pattern is #* with whatever looks the name began with.
s: ParseTree ← NIL;
name: ROPENIL;
c: CHAR ← Ascii.Lower[GetToken[]];
nameLooks: TextLooks.Looks ← looksRead;
bound: INT ← -1;
newNames ← names;
DO
IF ~(c IN ['a..'z]) THEN EXIT;
name ← Rope.Concat[name, Rope.FromChar[c]];
c ← Ascii.Lower[GetToken[]];
ENDLOOP;
IF name = NIL THEN SyntaxError[nameMustBeAString];
IF Rope.Equal[name, "all", FALSE] THEN
SyntaxError[theAllNameIsReserved];
FOR l: SimpleSymbolTable ← names, l.rest UNTIL l = NIL DO
IF Rope.Equal[name, l.first.name, FALSE] THEN {
IF c # endFieldToken THEN
SyntaxError[secondOccurenceOfFieldMustNotContainPattern];
IF ignoreCase THEN
field ← NEW[ParseTreeContent.fieldEqualsIC ← [fieldEqualsIC[l.first.number]]]
ELSE
field ← NEW[ParseTreeContent.fieldEquals ← [fieldEquals[l.first.number]]];
RETURN[field, names, number];
};
ENDLOOP;
newNumber ← number + 1;
newNames ← CONS[NEW[SimpleSymbolTableEntry ← [name, newNumber]], newNames];
IF c = boundSepToken THEN
bound ← ParseNumber[FALSE];
SELECT c FROM
fieldSepToken => {
s ← ParseP[];
IF GetToken[] # endFieldToken THEN SyntaxError[expectedEndOfField];
};
endFieldToken => {
s ← NEW[ParseTreeContent.closure ← [closure[NEW[ParseTreeContent.anyChar ← [anyChar[nameLooks]]]]]];
};
ENDCASE => SyntaxError[expectedEndOfField];
field ← NEW[ParseTreeContent.field ← [field[newNumber, bound, s]]];
};
ParseNumber: PROC [octal: BOOL] RETURNS [number: Index ← 0] = {
c: CHAR;
IF octal
THEN
WHILE (c ← GetToken[]) IN ['0..'7] DO
number ← 8*number + c - '0;
ENDLOOP
ELSE
WHILE (c ← GetToken[]) IN ['0..'9] DO
number ← 10*number + c - '0;
ENDLOOP;
IF c # '. THEN UnReadToken[];
};
IF addBounds THEN patternRope ← Rope.Concat["^", Rope.Concat[patternRope, "$"]];
pEnd ← MIN[Rope.Size[patternRope], patternStart+patternLen];
patternStart ← MIN[patternStart,pEnd];
pPos ← patternStart;
finder ← NEW[FinderRec];
IF word THEN finder.wordSearch ← TRUE;
so Try will know to make sure don't have adjacent alphanumerics
finder.ropeReader ← RopeReader.Create[];
RopeReader.SetPosition[finder.ropeReader, patternRope, patternStart];
IF patternRuns # NIL AND ~ignoreLooks
THEN {
finder.lksReader ← LooksReader.Create[];
LooksReader.SetPosition[finder.lksReader, patternRuns, patternStart]
}
ELSE
finder.lksReader ← NIL;
[parsedPatternList, nameList, numberOfFields] ← ParseTopLevel[];
finder.nameArray ← NEW[NameArray[numberOfFields+1]];
FOR l: SimpleSymbolTable ← nameList, l.rest UNTIL l = NIL DO
finder.nameArray[l.first.number].name ← l.first.name;
ENDLOOP;
forwardPattern ← NEW[ParseTreeContent.concat ←
[concat[CONS[NEW[ParseTreeContent.closure ←
[closure[NEW[ParseTreeContent.anyChar ← [anyChar[IgnoreLooks]]]]]], parsedPatternList]]]];
[forwardPattern, charClassList, numCharClasses] ← RegularExpression.OptimizeForwardSearch[forwardPattern, charClassList, numCharClasses];
TRUSTED {
backwardPattern ← NEW[ParseTreeContent.concat ←
[concat[LOOPHOLE[List.Append[
LOOPHOLE[parsedPatternList],
LOOPHOLE[LIST[NEW[ParseTreeContent.closure ←
[closure[NEW[ParseTreeContent.anyChar ← [anyChar[IgnoreLooks]]]]]]]]]]]]];
};
[backwardPattern, charClassList, numCharClasses] ← RegularExpression.OptimizeBackwardSearch[backwardPattern, charClassList, numCharClasses];
finder.classes ← NEW[ClassArray[numCharClasses]];
FOR l: LIST OF ParseTree ← charClassList, l.rest UNTIL l = NIL DO
WITH l.first SELECT FROM
x: REF ParseTreeContent.class => finder.classes[x.classNumber] ← x.class;
x: REF ParseTreeContent.skipOverClass => finder.classes[x.classNumber] ← x.class;
ENDCASE => ERROR;
ENDLOOP;
finder.stack ← NEW[StackContent ← [0, NEW[PatternStackArray[100]], NEW[TextStackArray[100]], NEW[ReturnCodeArray[100]]]]; -- Interim hack.
finder.forwardProgram ← RegularExpression.Compile[forwardPattern];
finder.backwardProgram ← RegularExpression.Compile[backwardPattern];
finder.wordSearch ← word;
};
}.