DIRECTORY TextFind, Ascii USING [Upper, Lower], RegExpFindPrivate USING [FinderRecord, CharClass, Index, CharClassContent, ParseTypes, ParseTreeContent, ParseTree, LegalInputCharacters, beginClassToken, endClassToken, notToken, anyToken, nodeBreakToken, beginNodeToken, powerToken, closureToken, greedyClosureToken, plusToken, greedyPlusToken, beginAltToken, endAltToken, altSepToken, beginFieldToken, endFieldToken, fieldSepToken, boundSepToken, endPatternToken, beginAllToken, endAllToken, subRangeToken, NameArray, IgnoreLooks, ClassArray, StackContent, PatternStackArray, TextStackArray, ReturnCodeArray], RegExpFindOptimize USING [OptimizeForwardSearch, OptimizeBackwardSearch], RegExpFindCompile USING [Compile], TextLooks USING [Looks, noLooks, Runs], TextEdit USING [GetRope, GetRuns], TextNode USING [pZone, RefTextNode, Offset], Rope USING [ROPE, Fetch, Equal, Cat, FromChar, Size], RopeReader USING [ReadOffEnd, Create, SetPosition, Get, GetIndex], RunReader USING [NoMoreRuns], RuntimeError USING [BoundsFault], List USING [Reverse, Append, Nconc], LooksReader USING [Get, Create, SetPosition]; RegExpFindImpl: CEDAR PROGRAM IMPORTS Ascii, TextEdit, LooksReader, RopeReader, TextNode, RunReader, Rope, RegExpFindOptimize, RegExpFindCompile, List, RuntimeError EXPORTS TextFind = { OPEN RegExpFindPrivate; RegExpPatternErrorCode: TYPE = { tooBig, -- The pattern is too big. illegalCharacter, improperAltSeparator, notInsideAlt, notInsideField, moreThanOneBeginAll, noMatchingBeginAll, nameMustBeAString, theAllNameIsReserved, secondOccurenceOfFieldMustNotContainPattern, expectedEndOfField, unexpectedEndOfPattern, noClosingEndAll, invalidNot, illegalOctalNumber, unknownAbbreviation }; MalformedPattern: PUBLIC ERROR [ec:TextFind.PatternErrorCode] = CODE; MaxLen: Offset = LAST[Offset]; Finder: TYPE = REF FinderRec; FinderRec: PUBLIC TYPE = FinderRecord; Offset: TYPE = TextNode.Offset; ROPE: TYPE = Rope.ROPE; RefTextNode: TYPE = TextNode.RefTextNode; NameLoc: PUBLIC PROC [finder: Finder, name: ROPE] RETURNS [at, atEnd: Offset] = { nameArray: REF NameArray _ IF finder # NIL THEN finder.nameArray ELSE NIL; at _ atEnd _ 0; IF nameArray = NIL THEN RETURN; IF Rope.Equal["all", name, FALSE] THEN RETURN [nameArray[0].at, nameArray[0].atEnd]; FOR i:NAT IN [0..nameArray.length) DO IF Rope.Equal[nameArray[i].name, name] THEN RETURN [nameArray[i].at, nameArray[i].atEnd]; ENDLOOP; }; NameLooks: PUBLIC PROC [finder: Finder, name: ROPE] RETURNS [looks: TextLooks.Looks] = { nameArray: REF NameArray _ IF finder # NIL THEN finder.nameArray ELSE NIL; looks _ TextLooks.noLooks; IF nameArray = NIL THEN RETURN; IF Rope.Equal["all", name, FALSE] THEN RETURN [nameArray[0].looks]; FOR i:NAT IN [0..nameArray.length) DO IF Rope.Equal[nameArray[i].name, name] THEN RETURN [nameArray[i].looks]; ENDLOOP; }; Create: PUBLIC PROC [pattern: RefTextNode, literal, word, ignoreLooks, ignoreCase, addBounds: BOOLEAN, patternStart: Offset, patternLen: Offset] RETURNS [finder: Finder] = { patternRope: ROPE _ TextEdit.GetRope[pattern]; patternRuns: TextLooks.Runs _ TextEdit.GetRuns[pattern]; RETURN [CreateFromParts[patternRope,patternRuns,literal,word, ignoreLooks,ignoreCase,addBounds,patternStart,patternLen]]; }; CreateFromRope: PUBLIC PROC [pattern: ROPE, literal, word, ignoreCase, addBounds: BOOLEAN, patternStart: Offset, patternLen: Offset] RETURNS [finder: Finder] = { RETURN [CreateFromParts[pattern,NIL,literal,word,TRUE, ignoreCase,addBounds,patternStart,patternLen]] }; CreateFromParts: PROC [patternRope: ROPE, patternRuns: TextLooks.Runs, literal, word, ignoreLooks, ignoreCase, addBounds: BOOLEAN, patternStart: Offset, patternLen: Offset] RETURNS [finder: Finder] = { ENABLE RuntimeError.BoundsFault => ERROR MalformedPattern[toobig]; SimpleSymbolTableEntry: TYPE = RECORD[name: ROPE, number: Index]; SimpleSymbolTable: TYPE = LIST OF REF SimpleSymbolTableEntry; nameList: SimpleSymbolTable; numberOfFields: Index; parsedPatternList: LIST OF ParseTree; forwardPattern, backwardPattern: ParseTree; char, patternChar: CHAR _ 377C; pEnd, pPos: Offset; numCharClasses: Index _ 0; charClassList: LIST OF ParseTree _ NIL; insideNamedPat: BOOLEAN _ FALSE; lastPhysicalCharUnread: BOOL _ FALSE; theLastPhysicalCharUnread: CHAR; inAbbreviation: BOOL _ FALSE; abbreviationPos: Offset; abbreviation: ROPE; lastCharacterRead: CHAR; looksRead: TextLooks.Looks _ TextLooks.noLooks; GetNextChar: PROC [eofOK: BOOL] RETURNS [c: CHAR] = { IF lastPhysicalCharUnread THEN { lastPhysicalCharUnread _ FALSE; RETURN[theLastPhysicalCharUnread]; }; IF inAbbreviation THEN { abbreviationPos _ abbreviationPos + 1; IF abbreviationPos < abbreviation.Size[] THEN c _ abbreviation.Fetch[abbreviationPos] ELSE inAbbreviation _ FALSE; }; IF ~inAbbreviation THEN { IF pPos >= pEnd THEN GOTO gotEnd; pPos _ pPos + 1; c _ finder.ropeReader.Get[! RopeReader.ReadOffEnd => GOTO gotEnd]; IF finder.lksReader = NIL THEN looksRead _ IgnoreLooks ELSE looksRead _ LooksReader.Get[finder.lksReader ! RunReader.NoMoreRuns => {looksRead _ TextLooks.noLooks; CONTINUE }]; }; IF ignoreCase THEN c _ Ascii.Upper[c]; EXITS gotEnd => { c _ endPatternToken; looksRead _ TextLooks.noLooks; IF ~eofOK THEN SyntaxError[unexpectedEndOfPattern]; }; }; UnReadLastPhysicalChar: PROC [c: CHAR] = { IF lastPhysicalCharUnread THEN ERROR; theLastPhysicalCharUnread _ c; lastPhysicalCharUnread _ TRUE; }; AbbreviationRec: TYPE = RECORD[char: CHAR, abbreviation: ROPE]; abbreviations: LIST OF AbbreviationRec _ LIST[ ['A, "([a..zA..Z0..9]++)"], ['B, "([ '011..'015]++)"], ['D, "([0..9]+.[0..9]**|[0..9]*.[0..9]++|[0..9]++)"], ['N, "('015)"], ['Q, "(\"[~\"]*\"|``[~'']*''''|`[~'']*'')"], ['S, "([~ '011..'015]++)"], ['W, "([a..zA..Z]++)"], ['^, "['001..'037]"] ]; SetUpAbbreviation: PROC [c: CHAR] = { IF inAbbreviation THEN ERROR; c _ Ascii.Upper[c]; FOR l: LIST OF AbbreviationRec _ abbreviations, l.rest UNTIL l = NIL DO IF c = l.first.char THEN { inAbbreviation _ TRUE; abbreviationPos _ -1; abbreviation _ l.first.abbreviation; RETURN; }; ENDLOOP; SyntaxError[unknownAbbreviation]; }; charUnRead: BOOL _ FALSE; GetToken: PROC [] RETURNS [char: CHAR] = { IF charUnRead THEN { charUnRead _ FALSE; RETURN[lastCharacterRead]; }; lastCharacterRead _ char _ GetNextChar[TRUE]; IF literal THEN IF char = endPatternToken OR char IN LegalInputCharacters THEN RETURN[char] ELSE SyntaxError[illegalCharacter]; IF char IN ['A..'Z] OR char IN ['a..'z] OR char IN ['0..'9] THEN RETURN[char]; SELECT char FROM '[ => char _ beginClassToken; '] => char _ endClassToken; '~ => char _ notToken; '# => char _ anyToken; '$ => char _ nodeBreakToken; '^ => char _ beginNodeToken; '! => char _ powerToken; '( => char _ beginAltToken; ') => char _ endAltToken; '| => char _ altSepToken; '< => char _ beginFieldToken; '> => char _ endFieldToken; ': => char _ fieldSepToken; ', => char _ boundSepToken; '{ => char _ beginAllToken; '} => char _ endAllToken; '' => { char _ GetNextChar[FALSE]; IF char IN ['0..'7] THEN { c2: CHAR _ GetNextChar[FALSE]; c3: CHAR _ GetNextChar[FALSE]; octalIndex: CARDINAL; IF ~(c2 IN ['0..'7]) OR ~(c3 IN ['0..'7]) THEN SyntaxError[illegalOctalNumber]; octalIndex _ (char-'0)*64 + (c2-'0)*8+(c3-'0); IF octalIndex > 127 THEN SyntaxError[illegalOctalNumber]; char _ VAL[octalIndex]; }; IF ~(char IN LegalInputCharacters) THEN SyntaxError[illegalCharacter]; }; '* => { c: CHAR _ GetNextChar[TRUE]; IF c = '* THEN char _ greedyClosureToken ELSE { UnReadLastPhysicalChar[c]; char _ closureToken; }; }; '+ => { c: CHAR _ GetNextChar[TRUE]; IF c = '+ THEN char _ greedyPlusToken ELSE { UnReadLastPhysicalChar[c]; char _ plusToken; }; }; '. => { c: CHAR _ GetNextChar[TRUE]; IF c = '. THEN char _ subRangeToken ELSE { UnReadLastPhysicalChar[c]; char _ '.; }; }; '\\ => { c: CHAR _ GetNextChar[FALSE]; SetUpAbbreviation[c]; RETURN[GetToken[]]; }; endPatternToken => NULL; ENDCASE => IF ~(char IN LegalInputCharacters) THEN SyntaxError[illegalCharacter]; lastCharacterRead _ char; RETURN[char]; }; UnReadToken: PROC[] = { IF charUnRead THEN ERROR; charUnRead _ TRUE; }; SyntaxError: PROC[kind: RegExpPatternErrorCode] = { ERROR MalformedPattern[toobig]; }; ParseCharClass: PROC [] RETURNS [r: ParseTree] = { ccr: REF ParseTreeContent.class _ NEW[ParseTreeContent.class]; complement: BOOL _ FALSE; c, lastChar: CHAR _ beginClassToken; charClass: CharClass _ NEW[CharClassContent _ ALL[FALSE]]; charClassList _ CONS[ccr, charClassList]; ccr.classNumber _ numCharClasses; numCharClasses _ numCharClasses + 1; IF GetToken[] = notToken THEN complement _ TRUE ELSE UnReadToken[]; ccr.looks _ looksRead; ccr.class _ charClass; r _ ccr; WHILE (c _ GetToken[]) # endClassToken DO IF c = subRangeToken THEN { IF ~(lastChar IN LegalInputCharacters) THEN SyntaxError[illegalCharacter]; c _ GetToken[]; IF ~(c IN LegalInputCharacters) THEN SyntaxError[illegalCharacter]; FOR x: CHAR IN [lastChar..c] DO charClass[x] _ TRUE; ENDLOOP; lastChar _ subRangeToken; } ELSE IF ~(c IN LegalInputCharacters) THEN SyntaxError[illegalCharacter] ELSE { lastChar _ c; charClass[c] _ TRUE; }; ENDLOOP; IF ignoreCase THEN FOR x: CHAR IN ['a..'z] DO IF charClass[x] THEN charClass[x-'a+'A] _ TRUE ELSE IF charClass[x-'a+'A] THEN charClass[x] _ TRUE; ENDLOOP; IF complement THEN FOR x: CHAR IN LegalInputCharacters DO charClass[x] _ ~charClass[x]; ENDLOOP; }; ParseX: PROC [] RETURNS [p: ParseTree _ NIL] = { c: CHAR _ GetToken[]; SELECT c FROM IN LegalInputCharacters => IF ignoreCase AND c IN ['A..'Z] THEN p _ NEW[ParseTreeContent.charIC _ [charIC[c, looksRead]]] ELSE p _ NEW[ParseTreeContent.char _ [char[c, looksRead]]]; beginClassToken => p _ ParseCharClass[]; anyToken => p _ NEW[ParseTreeContent.anyChar _ [anyChar[looksRead]]]; nodeBreakToken => p _ NEW[ParseTreeContent.nodeBreak]; beginNodeToken => p _ NEW[ParseTreeContent.beginNode]; ENDCASE => UnReadToken[]; }; ParseZ: PROC [] RETURNS [p: ParseTree _ NIL] = { c: CHAR _ GetToken[]; IF c = beginAltToken THEN { l: LIST OF ParseTree _ NIL; DO q: ParseTree _ ParseP[]; IF q = NIL THEN q _ NEW[ParseTreeContent.noOp]; l _ CONS[q, l]; c _ GetToken[]; IF c = endAltToken THEN EXIT ELSE IF c # altSepToken THEN SyntaxError[improperAltSeparator]; ENDLOOP; IF l = NIL THEN RETURN[NIL] ELSE IF l.rest = NIL THEN RETURN[l.first] ELSE TRUSTED { RETURN[NEW[ParseTreeContent.alt _ [alt[LOOPHOLE[List.Reverse[LOOPHOLE[l]]]]]]]; }; } ELSE IF c = endAltToken OR c = altSepToken THEN { UnReadToken[]; RETURN[NIL] } ELSE { UnReadToken[]; p _ ParseX[]; }; }; ParseP: PROC [] RETURNS [ParseTree] = { l: LIST OF ParseTree _ NIL; p: ParseTree; DO c: CHAR _ GetToken[]; IF c = notToken THEN p _ NEW[ParseTreeContent.skipTo _ [skipTo[ParseZ[]]]] ELSE { UnReadToken[]; p _ ParseZ[]; IF p = NIL THEN EXIT; c _ GetToken[]; SELECT c FROM closureToken => p _ NEW[ParseTreeContent.closure _ [closure[p]]]; greedyClosureToken => p _ NEW[ParseTreeContent.greedyClosure _ [greedyClosure[p]]]; plusToken => p _ NEW[ParseTreeContent.concat _ [concat[LIST[p, NEW[ParseTreeContent.closure _ [closure[p]]]]]]]; greedyPlusToken => p _ NEW[ParseTreeContent.concat _ [concat[LIST[p, NEW[ParseTreeContent.greedyClosure _ [greedyClosure[p]]]]]]]; powerToken => { l: LIST OF ParseTree _ NIL; iterations: Index _ ParseNumber[FALSE]; WHILE iterations > 0 DO l _ CONS[p, l]; iterations _ iterations - 1; ENDLOOP; p _ NEW[ParseTreeContent.concat _ [concat[l]]]; }; ENDCASE => UnReadToken[]; }; IF p.type = concat THEN { pp: REF ParseTreeContent.concat _ NARROW[p]; FOR ll: LIST OF ParseTree _ pp.concats, ll.rest UNTIL ll = NIL DO l _ CONS[ll.first, l]; ENDLOOP; } ELSE l _ CONS[p, l]; IF l.first.type = closure THEN { q: REF ParseTreeContent.closure _ NARROW[l.first]; IF q.p.type = closure OR q.p.type = greedyClosure THEN l.first _ q.p; } ELSE IF l.first.type = greedyClosure THEN { q: REF ParseTreeContent.greedyClosure _ NARROW[l.first]; IF q.p.type = closure THEN { qq: REF ParseTreeContent.closure _ NARROW[q.p]; q.p _ qq.p; } ELSE IF q.p.type = greedyClosure THEN l.first _ q.p; }; ENDLOOP; IF l = NIL THEN RETURN[NIL] ELSE IF l.rest = NIL THEN RETURN[l.first] ELSE TRUSTED { RETURN[NEW[ParseTreeContent.concat _ [concat[LOOPHOLE[List.Reverse[LOOPHOLE[l]]]]]]]; }; }; ParseTopLevel: PROC [] RETURNS [l: LIST OF ParseTree _ NIL, nameList: SimpleSymbolTable _ NIL, numberFields: Index _ 0] = { seenBeginAllToken, seenEndAllToken: BOOL _ FALSE; DO c: CHAR _ GetToken[]; SELECT c FROM beginFieldToken => { field: ParseTree _ NIL; [field, nameList, numberFields] _ ParseField[nameList, numberFields]; l _ CONS[field, l]; }; endFieldToken => SyntaxError[notInsideField]; beginAllToken => { IF seenBeginAllToken THEN SyntaxError[moreThanOneBeginAll]; seenBeginAllToken _ TRUE; l _ CONS[NEW[ParseTreeContent.beginALL], l]; }; endAllToken => { IF ~seenBeginAllToken OR seenEndAllToken THEN SyntaxError[noMatchingBeginAll]; seenEndAllToken _ TRUE; l _ CONS[NEW[ParseTreeContent.endALL], l]; }; endPatternToken => { IF ~seenBeginAllToken THEN l _ CONS[NEW[ParseTreeContent.endALL], l]; IF seenBeginAllToken AND ~seenEndAllToken THEN SyntaxError[noClosingEndAll]; l _ CONS[NEW[ParseTreeContent.endAll], l]; EXIT; }; ENDCASE => { p: ParseTree; UnReadToken[]; p _ ParseP[]; IF p = NIL THEN SyntaxError[illegalCharacter]; WITH p SELECT FROM z: REF ParseTreeContent.concat => TRUSTED { l _ LOOPHOLE[List.Nconc[List.Reverse[LOOPHOLE[z.concats]], LOOPHOLE[l]]]; }; ENDCASE => l _ CONS[p, l]; }; ENDLOOP; TRUSTED {l _ LOOPHOLE[List.Reverse[LOOPHOLE[l]]]}; IF ~seenBeginAllToken THEN l _ CONS[NEW[ParseTreeContent.beginALL], l]; l _ CONS[NEW[ParseTreeContent.beginAll], l]; }; ParseField: PROC [names: SimpleSymbolTable, number: Index] RETURNS [field: ParseTree, newNames: SimpleSymbolTable, newNumber: Index] = { s: ParseTree _ NIL; name: ROPE _ NIL; c: CHAR _ Ascii.Lower[GetToken[]]; nameLooks: TextLooks.Looks _ looksRead; bound: INT _ -1; newNames _ names; DO IF ~(c IN ['a..'z]) THEN EXIT; name _ Rope.Cat[name,Rope.FromChar[c]]; c _ Ascii.Lower[GetToken[]]; ENDLOOP; IF name = NIL THEN SyntaxError[nameMustBeAString]; IF Rope.Equal[name, "all", FALSE] THEN SyntaxError[theAllNameIsReserved]; FOR l: SimpleSymbolTable _ names, l.rest UNTIL l = NIL DO IF Rope.Equal[name, l.first.name, FALSE] THEN { IF c # endFieldToken THEN SyntaxError[secondOccurenceOfFieldMustNotContainPattern]; IF ignoreCase THEN field _ NEW[ParseTreeContent.fieldEqualsIC _ [fieldEqualsIC[l.first.number]]] ELSE field _ NEW[ParseTreeContent.fieldEquals _ [fieldEquals[l.first.number]]]; RETURN[field, names, number]; }; ENDLOOP; newNumber _ number + 1; newNames _ CONS[NEW[SimpleSymbolTableEntry _ [name, newNumber]], newNames]; IF c = boundSepToken THEN bound _ ParseNumber[FALSE]; IF c = fieldSepToken THEN { s _ ParseP[]; IF GetToken[] # endFieldToken THEN SyntaxError[expectedEndOfField]; } ELSE IF c = endFieldToken THEN { s _ NEW[ParseTreeContent.closure _ [closure[NEW[ParseTreeContent.anyChar _ [anyChar[nameLooks]]]]]]; } ELSE SyntaxError[expectedEndOfField]; field _ NEW[ParseTreeContent.field _ [field[newNumber, bound, s]]]; }; ParseNumber: PROC [octal: BOOL] RETURNS [number: Index _ 0] = { c: CHAR; IF octal THEN WHILE (c _ GetToken[]) IN ['0..'7] DO number _ 8*number + c - '0; ENDLOOP ELSE WHILE (c _ GetToken[]) IN ['0..'9] DO number _ 10*number + c - '0; ENDLOOP; IF c # '. THEN UnReadToken[]; }; IF addBounds THEN patternRope _ Rope.Cat["^", Rope.Cat[patternRope, "$"]]; pEnd _ MIN[Rope.Size[patternRope], patternStart+patternLen]; patternStart _ MIN[patternStart,pEnd]; pPos _ patternStart; finder _ TextNode.pZone.NEW[FinderRec]; IF word THEN finder.wordSearch _ TRUE; finder.ropeReader _ RopeReader.Create[]; RopeReader.SetPosition[finder.ropeReader, patternRope, patternStart]; IF patternRuns # NIL AND ~ignoreLooks THEN { finder.lksReader _ LooksReader.Create[]; LooksReader.SetPosition[finder.lksReader, patternRuns, patternStart] } ELSE finder.lksReader _ NIL; [parsedPatternList, nameList, numberOfFields] _ ParseTopLevel[]; finder.nameArray _ TextNode.pZone.NEW[NameArray[numberOfFields+1]]; FOR l: SimpleSymbolTable _ nameList, l.rest UNTIL l = NIL DO finder.nameArray[l.first.number].name _ l.first.name; ENDLOOP; forwardPattern _ NEW[ParseTreeContent.concat _ [concat[CONS[NEW[ParseTreeContent.closure _ [closure[NEW[ParseTreeContent.anyChar _ [anyChar[IgnoreLooks]]]]]], parsedPatternList]]]]; [forwardPattern, charClassList, numCharClasses] _ RegExpFindOptimize.OptimizeForwardSearch[forwardPattern, charClassList, numCharClasses]; TRUSTED { backwardPattern _ NEW[ParseTreeContent.concat _ [concat[LOOPHOLE[List.Append[ LOOPHOLE[parsedPatternList], LOOPHOLE[LIST[NEW[ParseTreeContent.closure _ [closure[NEW[ParseTreeContent.anyChar _ [anyChar[IgnoreLooks]]]]]]]]]]]]]; }; [backwardPattern, charClassList, numCharClasses] _ RegExpFindOptimize.OptimizeBackwardSearch[backwardPattern, charClassList, numCharClasses]; finder.classes _ NEW[ClassArray[numCharClasses]]; FOR l: LIST OF ParseTree _ charClassList, l.rest UNTIL l = NIL DO WITH l.first SELECT FROM x: REF ParseTreeContent.class => finder.classes[x.classNumber] _ x.class; x: REF ParseTreeContent.skipOverClass => finder.classes[x.classNumber] _ x.class; ENDCASE => ERROR; ENDLOOP; finder.stack _ NEW[StackContent _ [0, NEW[PatternStackArray[100]], NEW[TextStackArray[100]], NEW[ReturnCodeArray[100]]]]; -- Interim hack. finder.forwardProgram _ RegExpFindCompile.Compile[forwardPattern]; finder.backwardProgram _ RegExpFindCompile.Compile[backwardPattern]; finder.wordSearch _ word; }; -- of CreateFromParts Start: PUBLIC PROC = {}; }. $RegExpFindImpl.Mesa derived from Paxton's TextFindImpl of Tioga which was derived from EditFind.Mesa of Laurel 6 Last Edited by: Nix, December 21, 1983 4:22 pm -- ***** Operations ***** MalformedPattern[kind, MAX[0, IF lastPhysicalCharUnread THEN pPos-2 ELSE pPos-1]]; Parses the character class notation, which in its simplest form is a sequence of characters between []'s, e.g. [0123456789], which specifies a pattern that will match any character in the sequence. Ranges of characters may be specified with .., e.g. [A..F] is the same as [ABCDEF]. Special characters can and must be quoted, e.g. ['(')''] will match either a left parenthesis, a right parenthesis, or a quote. All characters but those in the class may be matched by using a ~ as the first symbol in the class, e.g. [~A..Za..z0..9] will match all but the alphanumeric characters. If ignoreCase is true, then if the set includes a lower case 'x, it will be made to also include the upper case 'X, and vice-versa. X ::= non-special character Not one of '[]~#$*+(){}<>\ .. 'special character One of the above. (Handled in tokenizer) 'nnn A way of specifying octal control characters. (Handled in tokenizer) [character class] Character class notation, A..Z means ASCII interval A..Z [~character class] Not the characters in this class. # Any character. $ Node break. ^ Beginning of node. \x Predefined patterns; x is an alphanumeric character, or . (Handled in tokenizer) Z ::= X A single-character pattern. (P|P|...|P) Alternation. The pattern matching fragments for full patterns. P ::= null The empty pattern. Z Z*P Min-matching closure. Z**P Greedy closure. Z+P PP* Z++P PP** ~ZP Deterministically match anything up to but not including P. A simple A-list symbol table that maps field names to numbers. The top level syntax. T ::= T Named portions. Valid only at top level. Reserved name ALL. T Bound the number of CR's matched by P to number. PT Concatenation. P Or just a pattern. The tipity-top level syntax. TT ::= T T{T}T Delimits the virtual start and end of the text matched by . This routine parses a field, which is of the form . The name is a sequence of alphabetic characters; the bound is a non-negative integer, and the pattern is a P. The bound and pattern are optional. By default, there is no bound and the pattern is #* with whatever looks the name began with. -- so Try will know to make sure don't have adjacent alphanumerics Κr˜JšœΟc™Jš+™+Jšœ™0J™.J˜šΟk ˜ Icodešœ ˜ Kšœžœ˜Kšœžœš˜±Kšœžœ1˜IKšœžœ ˜"Kšœ žœ˜'Kšœ žœ˜"Kšœ žœ˜,Kšœžœžœ%˜5Kšœ žœ2˜BKšœ žœ˜Kšœ žœ˜!Kšœžœ˜$Kšœ žœ˜-J˜—JšΟbœžœž˜Kšžœ˜†Jšžœ ˜Jšžœ˜šœžœ˜ Jšœ ˜&Jšœ˜Jšœ˜J˜ J˜Jšœ˜Jšœ˜Jšœ˜Jšœ˜Jšœ,˜,Jšœ˜J˜J˜J˜ J˜J˜J˜J˜—J˜J˜Jšœžœžœ"žœ˜EJšœžœ ˜J˜J˜Jšœžœžœ ˜Jšœ žœžœ˜&Jšœžœ˜Jšžœžœžœ˜Jšœ žœ˜)J˜Jš™J˜š Οnœžœžœžœžœ˜QKš œ žœ žœ žœžœžœžœ˜KK˜Kšžœ žœžœžœ˜Kšžœžœžœžœ'˜Tšžœžœžœž˜%šžœ%ž˜+Kšžœ'˜-—Kšžœ˜—K˜K˜—š   œžœžœžœžœ˜XKš œ žœ žœ žœžœžœžœ˜JK˜Kšžœ žœžœžœ˜Kšžœžœžœžœ˜Cšžœžœžœž˜%Kšžœ%žœžœ˜HKšžœ˜—K˜K˜—š  œžœžœKžœ,žœ˜­Kšœ žœ˜.K˜8šžœ7˜=K˜;—K˜K˜K˜—š  œžœžœ žœ(žœ,žœ˜‘šžœžœžœ˜6K˜/—K˜K˜K˜—š  œžœžœRžœ,žœ˜ΙJšžœžœ˜BJšœžœžœžœ˜AJš œžœžœžœžœ˜=Jšœ˜J˜J˜%J˜+Kšœžœ˜K˜Kšœ˜Kšœžœžœ žœ˜'Kšœžœžœ˜ Kšœžœžœ˜%Kšœžœ˜ Kšœžœžœ˜K˜Kšœžœ˜Kšœžœ˜Kšœ/˜/š   œžœ žœžœžœ˜5šžœžœ˜ Kšœžœ˜Kšžœ˜"K˜—šžœžœ˜K˜&šžœ'žœ˜.Kšœ'˜'—šžœ˜Kšœžœ˜—K˜—šžœžœ˜Kšžœžœžœ˜!K˜Kšœ5žœ ˜Bšžœžœžœ˜Kšœ˜—šž˜šœG˜GKšœ žœ˜,——K˜—Kšžœ žœ˜&šž˜˜ K˜K˜Kšžœžœ%˜3K˜——K˜—š œžœžœ˜*Kšžœžœžœ˜%Kšœ˜Kšœžœ˜K˜—Kš œžœžœžœžœ˜?šœžœžœžœ˜.Kšœ˜Kšœ˜Kšœ5˜5Kšœ˜K˜,Kšœ˜Kšœ˜K˜K˜—š œžœžœ˜%Kšžœžœžœ˜Kšž˜š žœžœžœ)žœžœž˜Gšžœžœ˜Kšœžœ˜K˜Kšœ$˜$Kšžœ˜K˜—Kšžœ˜—K˜!K˜—Kšœ žœžœ˜š œžœžœžœ˜*šžœ žœ˜Kšœ žœ˜Kšžœ˜K˜—Kšœ'žœ˜.šžœ žœ˜Kš žœžœžœžœžœžœ˜o—š žœžœ žœžœ žœžœ ž˜@Kšžœ˜ —šžœž˜Kšœ˜Kšœ˜Kšœ˜Kšœ˜Kšœ˜K˜K˜Kšœ˜Kšœ˜Kšœ˜Kšœ˜Kšœ˜Kšœ˜Kšœ˜Kšœ˜Kšœ˜šœ˜Kšœžœ˜šžœžœ žœ˜Kšœžœžœ˜Kšœžœžœ˜Kšœ ž ˜š žœžœ žœžœ žœ˜/K˜ —Kšœ.˜.Kšžœžœ!˜9Kšœžœ ˜K˜—Kšžœžœžœ˜FK˜—šœ˜Kšœžœžœ˜šžœž˜Kšœ˜—šžœ˜Kšœ˜Kšœ˜K˜—K˜—šœ˜Kšœžœžœ˜šžœž˜Kšœ˜—šžœ˜Kšœ˜Kšœ˜K˜—K˜—˜Kšœžœžœ˜šžœž˜Kšœ˜—šžœ˜Kšœ˜Kšœ ˜ K˜—K˜—˜Kšœžœžœ˜K˜Kšžœ ˜K˜—Kšœžœ˜Kšžœžœžœžœ˜Q—K˜Kšžœ˜ K˜—š  œžœ˜Kšžœ žœžœ˜Kšœ žœ˜K˜—š  œžœ"˜3Kšžœ˜KšœR™RK˜—JšœΚ™Κš œžœžœ˜2Kšœžœžœ˜>Kšœ žœžœ˜Kšœ žœ˜$Kšœžœžœžœ˜:Kšœžœ˜)Kšœ!˜!K˜$šžœž˜Kšœ ž˜—šž˜K˜—Kšœ˜K˜K˜šžœ"ž˜)šžœžœ˜Kšžœ žœžœ˜JKšœ˜Kšžœžœžœ˜Cšžœžœžœž˜Kšœžœ˜Kšžœ˜—Kšœ˜K˜—šžœžœžœž˜)Kšœ˜—šž˜K˜ Kšœžœ˜K˜—Kšžœ˜—šžœ ž˜šžœžœžœ ž˜šžœž˜Kšœž˜—šžœžœž˜Kšœžœ˜—Kšžœ˜——šžœ ž˜šžœžœžœž˜&K˜Kšžœ˜——K˜K˜—šœ™Jšœ™4Jšœ=™=Jšœ žœD™PJš œ.žœžœžœ žœž™LJšœ5™5Jšœ™Jšœ™J™Jšœa™a—š œžœžœžœ˜0Kšœžœ˜šžœž˜ šžœ˜šžœ žœžœ žœ˜%Kšœžœ2˜9—šž˜Kšœžœ/˜6——˜K˜—˜ Kšœžœ2˜9—˜Kšœžœ˜$—˜Kšœžœ˜$—Kšžœ˜—K˜K˜—šœ™Jšœ#™#Jšœ™—š œžœžœžœ˜0Kšœžœ˜šžœžœ˜Kšœžœžœ žœ˜šž˜Kšœ˜Kšœ/˜/Kšœžœ˜K˜šžœž˜Kšž˜—Kšžœžœžœ#˜?Kšžœ˜—šžœžœž˜Kšžœžœ˜ —šžœžœ žœž˜Kšžœ ˜—šž˜šžœ˜ Kšžœžœžœžœ ˜OK˜——K˜—šžœžœžœž˜1K˜Kšžœžœ˜ K˜—šžœ˜K˜K˜ K˜—K˜—J™Jšœ1™1šœ™Jšœ™Jšœ ™ Jšœ™Jšœ™Jšœ ™ Jšœ™JšœD™D—š œžœžœ˜'Kšœžœžœ žœ˜Kšœ ˜ šž˜Kšœžœ˜šžœž˜Kšœžœ.˜5—šžœ˜K˜K˜ Kšžœžœžœžœ˜K˜šžœž˜ šœ˜Kšœžœ*˜1—˜Kšœžœ6˜=—šœ ˜ Kšœžœ#žœžœ.˜c—šœ˜Kšœžœ#žœžœ:˜o—˜Kšœžœžœ žœ˜Kšœ žœ˜'šžœž˜Kšœžœ˜K˜Kšžœ˜—Kšœžœ(˜/K˜—Kšžœ˜—K˜—šžœžœ˜Kšœžœžœ˜,š žœžœžœ!žœžœž˜AKšœžœ˜Kšžœ˜—K˜—šž˜Kšœžœ˜—šžœžœ˜ Kšœžœžœ ˜2šžœžœž˜6K˜—K˜—šžœžœžœ˜+Kšœžœ"žœ ˜8šžœžœ˜Kšœžœžœ˜/K˜ K˜—šžœžœž˜%K˜—K˜—Kšžœ˜—šžœžœž˜Kšžœžœ˜ —šžœžœ žœž˜Kšžœ ˜—šž˜šžœ˜ Kšžœžœ#žœžœ ˜UK˜——K˜K˜—Jšœ>™>J˜Jšœ™šœ™JšœJ™JJšœB™BJšœ™Jšœ™—Jšœ™šœ™Jšœ™JšœJ™J—š   œžœžœžœžœ0žœ˜{Kšœ$žœžœ˜1šž˜Kšœžœ˜šžœž˜ šœ˜Kšœžœ˜K˜EKšœžœ ˜K˜—K˜-˜Kšžœžœ"˜;Kšœžœ˜Kšœžœžœ ˜,K˜—˜šžœžœž˜-Kšœ ˜ —Kšœžœ˜Kšœžœžœ˜*K˜—šœ˜šžœž˜Kšœžœžœ˜*—šžœžœž˜.K˜—Kšœžœžœ˜*Kšžœ˜K˜—šžœ˜ K˜ K˜Kšœ ˜ Kšžœžœžœ˜.šžœžœž˜šœžœžœ˜+Kšœžœžœžœ˜IK˜—Kšžœžœ˜—K˜——Kšžœ˜—Kšžœžœžœ˜2šžœž˜Kšœžœžœ ˜,—Kšœžœžœ ˜,K˜K˜—JšœΊ™Ίš  œžœ+žœF˜ˆKšœžœ˜Kšœžœžœ˜Kšœžœ˜"K˜'Kšœžœ˜K˜šžœ˜Kšžœžœ žœžœ˜K˜'Kšœ˜Kšžœ˜—Kšžœžœžœ ˜2šžœžœž˜&Kšœ#˜#—šžœ&žœžœž˜9šžœ žœžœ˜/šžœž˜Kšœ9˜9—šžœ ž˜KšœžœB˜M—šžœ˜Kšœžœ@˜K—Kšžœ˜K˜—Kšžœ˜—K˜Kšœ žœžœ8˜Kšžœž˜Kšœžœ˜—šžœžœ˜Kšœ ˜ Kšžœžœ!˜CK˜—šžœžœžœ˜ Kšœžœ%žœ5˜dK˜—šž˜Kšœ ˜ —Kšœžœ9˜DK˜—š  œžœ žœžœ˜?Kšœžœ˜šžœžœ˜šžœžœ ž˜%Kšœ˜Kšž˜——šž˜šžœžœ ž˜%Kšœ˜Kšžœ˜——šžœž˜K˜—K˜—J˜šžœ žœ˜J˜8J˜—Kšœžœ2˜˜J———Kšœ˜—Kšœ˜Kšœžœ˜1š žœžœžœ#žœžœž˜Ašžœ žœž˜KšœžœC˜IKšœžœK˜QKšžœžœ˜—Kšžœ˜—Kš œžœžœžœžœ˜‹KšœB˜BKšœD˜DK˜Jšžœ˜—J˜Jš œžœžœ˜J˜Jšœ˜J˜—…—DReθ