DIRECTORY NodeReader USING [Ref], TextNode USING [Offset], Rope USING [ROPE], Tioga USING [allLooks, Looks, Node, Runs]; RegularExpression: CEDAR DEFINITIONS = { ROPE: TYPE = Rope.ROPE; Offset: TYPE = TextNode.Offset; CharClassContent: TYPE = PACKED ARRAY LegalCharacters OF BOOL; CharClass: TYPE = REF CharClassContent; Index: TYPE = NAT; IgnoreLooks: Tioga.Looks = Tioga.allLooks; ParseTypes: TYPE = { concat, char, charIC, class, anyChar, nodeBreak, skipToNodeBreak, beginNode, skipToBeginNode, fail, succeed, noOp, skipOverClass, skipOverChar, skipOverCharIC, skipOver, skipToChar, skipToCharIC, skipToString, skipTo, skipToEnd, skipToBeginning, closure, carefulClosure, greedyClosure, carefulGreedyClosure, alt, field, fieldEquals, fieldEqualsIC, beginAll, endAll, beginALL, endALL}; ParseTreeContent: TYPE = RECORD[ SELECT type: ParseTypes FROM concat => [concats: LIST OF ParseTree], char, charIC => [char: CHAR, looks: Tioga.Looks], class => [class: CharClass, looks: Tioga.Looks, classNumber: Index], anyChar => [looks: Tioga.Looks], nodeBreak => NULL, skipToNodeBreak => [looks: Tioga.Looks], beginNode => NULL, skipToBeginNode => NULL, fail, succeed, noOp => NULL, skipOverClass => [class: CharClass, looks: Tioga.Looks, classNumber: Index], skipOverChar, skipOverCharIC => [char: CHAR, looks: Tioga.Looks], skipOver => [p: ParseTree], skipToChar, skipToCharIC => [char: CHAR, looks: Tioga.Looks], skipToString => [p: ParseTree], skipTo => [p: ParseTree], skipToEnd, skipToBeginning => NULL, closure, carefulClosure, greedyClosure, carefulGreedyClosure => [p: ParseTree], alt => [alts: LIST OF ParseTree], field => [number: Index, bound: INT, p: ParseTree], fieldEquals, fieldEqualsIC => [number: Index], beginAll, endAll, beginALL, endALL => NULL ENDCASE ]; ParseTree: TYPE = REF ParseTreeContent; EightBit: CHAR = 200C; CharMask: CHAR = 177C; LegalCharacters: TYPE = CHAR[0C..177C]; LegalInputCharacters: TYPE = CHAR[1C..177C]; beginClassToken: CHAR = EightBit + 1; -- [ endClassToken: CHAR = EightBit + 2; -- ] notToken: CHAR = EightBit + 3; -- ~ anyToken: CHAR = EightBit + 4; -- # nodeBreakToken: CHAR = EightBit + 5; -- $ closureToken: CHAR = EightBit + 6; -- * greedyClosureToken: CHAR = EightBit + 7; -- ** plusToken: CHAR = EightBit + 8; -- + greedyPlusToken: CHAR = EightBit + 9; -- ++ beginAltToken: CHAR = EightBit + 10; -- ( endAltToken: CHAR = EightBit + 11; -- ) altSepToken: CHAR = EightBit + 12; -- | beginFieldToken: CHAR = EightBit + 13; -- < endFieldToken: CHAR = EightBit + 14; -- > fieldSepToken: CHAR = EightBit + 15; -- : boundSepToken: CHAR = EightBit + 16; -- , endPatternToken: CHAR = EightBit + 17; -- end of Pattern. beginAllToken: CHAR = EightBit + 19; -- { endAllToken: CHAR = EightBit + 20; -- } subRangeToken: CHAR = EightBit + 21; -- .. beginNodeToken: CHAR = EightBit +22; -- ­ powerToken: CHAR = EightBit +23; -- ! OpCode: TYPE = { matchChar, matchCharIC, matchCharLooks, matchCharLooksIC, matchClass, matchClassLooks, matchAnyChar, matchAnyCharLooks, matchNodeBreak, skipToNodeBreak, skipToNodeBreakLooks, matchBeginNode, fail, succeed, noOp, skipOverClass, skipOverClassLooks, skipOverChar, skipOverCharLooks, skipOverCharIC, skipOverCharLooksIC, skipToChar, skipToCharLooks, skipToCharIC, skipToCharLooksIC, skipToString, endOfSkipToString, skipToEnd, skipToBeginning, closure, carefulClosure, carefulClosureEnd, greedyClosure, carefulGreedyClosure, carefulGreedyClosureEnd, alt, fieldStart, fieldEnd, boundNodeBreaks, fieldEquals, fieldEqualsLooks, fieldEqualsIC, fieldEqualsLooksIC, beginAll, endAll }; ReturnCode: TYPE = {skipToStringRet, closureRet, carefulClosureRet, greedyClosureRet, carefulGreedyClosureRet, altRet}; PatternPC: TYPE = NAT; TextStackArray: TYPE = RECORD[stack: SEQUENCE length: NAT OF Offset]; PatternStackArray: TYPE = RECORD[stack: SEQUENCE length: NAT OF PatternPC]; PatternOpCodeArray: TYPE = RECORD[opCodes: SEQUENCE length: NAT OF OpCode]; PatternDataArray: TYPE = RECORD[opCodes: SEQUENCE length: NAT OF NAT]; PatternNextArray: TYPE = RECORD[opCodes: SEQUENCE length: NAT OF PatternPC]; PatternLooksArray: TYPE = RECORD[looks: SEQUENCE length: NAT OF Tioga.Looks]; ClassArray: TYPE = RECORD[classes: SEQUENCE length: NAT OF CharClass]; ReturnCodeArray: TYPE = RECORD[returnCodes: SEQUENCE length: NAT OF ReturnCode]; NameArray: TYPE = RECORD [array: SEQUENCE length: NAT OF NameRec]; NameRec: TYPE = RECORD [name: ROPE, at, atEnd: Offset, looks: Tioga.Looks]; CodeContent: TYPE = RECORD[ opCodes: REF PatternOpCodeArray, looks: REF PatternLooksArray, data: REF PatternDataArray, next: REF PatternNextArray ]; Code: TYPE = REF CodeContent; StackContent: TYPE = RECORD[ pos: Index, pc: REF PatternStackArray, nextPos: REF TextStackArray, returnCode: REF ReturnCodeArray ]; Stack: TYPE = REF StackContent; FinderRecord: TYPE = RECORD [ stack: Stack, forwardProgram: Code, backwardProgram: Code, classes: REF ClassArray, nameArray: REF NameArray, nodeReader: NodeReader.Ref, doLooks: BOOL ¬ FALSE, wordSearch: BOOL ¬ FALSE ]; Finder: TYPE = REF FinderRecord; MalformedPattern: ERROR [ec: PatternErrorCode]; PatternErrorCode: TYPE = { toobig, -- pattern too long endquote, -- pattern ends with ' endtilda, -- pattern ends with ~ boundary, -- pattern has | inside rather than at beginning or end missingNameEnd, -- pattern has < without matching > unmatchedNameEnd -- pattern has > without previous < }; Create: PROC [pattern: Tioga.Node, literal, word, ignoreLooks, ignoreCase, addBounds: BOOL ¬ FALSE, patternStart: INT ¬ 0, patternLen: INT ¬ LAST[INT]] RETURNS [finder: Finder]; CreateFromRope: PROC [pattern: ROPE, literal, word, ignoreCase, addBounds: BOOL ¬ FALSE, patternStart: INT ¬ 0, patternLen: INT ¬ LAST[INT]] RETURNS [finder: Finder]; NameLooks: PROC [finder: Finder, name: ROPE] RETURNS [looks: Tioga.Looks]; NameLoc: PROC [finder: Finder, name: ROPE] RETURNS [at, atEnd: INT]; SearchRope: PROC [finder: Finder, rope: ROPE, start: INT ¬ 0, len: INT ¬ LAST[INT], interrupt: REF BOOL ¬ NIL] RETURNS [found: BOOL, at, atEnd, before, after: INT]; Try: PROC [finder: Finder, text: Tioga.Node, start: INT ¬ 0, len: INT ¬ LAST[INT], looksExact: BOOL ¬ FALSE, interrupt: REF BOOL ¬ NIL] RETURNS [found: BOOL, at, atEnd, before, after: INT]; SearchRopeBackwards: PROC [finder: Finder, rope: ROPE, start: INT ¬ 0, len: INT ¬ LAST[INT], interrupt: REF BOOL ¬ NIL] RETURNS [found: BOOL, at, atEnd, before, after: INT]; TryBackwards: PROC [finder: Finder, text: Tioga.Node, start: INT ¬ 0, len: INT ¬ LAST[INT], looksExact: BOOL ¬ FALSE, interrupt: REF BOOL ¬ NIL] RETURNS [found: BOOL, at, atEnd, before, after: INT]; Search: PUBLIC PROC [finder: Finder, rope: ROPE, runs: Tioga.Runs, start: Offset, len: Offset, looksExact: BOOL, interrupt: REF BOOL ¬ NIL] RETURNS [found: BOOL, at, atEnd, before, after: Offset]; OptimizeForwardSearch: PROC[p: ParseTree, ccl: LIST OF ParseTree, numCcl: Index] RETURNS [np: ParseTree, newCcl: LIST OF ParseTree, newNumCcl: Index]; OptimizeBackwardSearch: PROC[p: ParseTree, ccl: LIST OF ParseTree, numCcl: Index] RETURNS [np: ParseTree, newCcl: LIST OF ParseTree, newNumCcl: Index]; Compile: PROC [p: ParseTree] RETURNS [c: Code]; }. t RegularExpression.mesa Copyright Σ 1985, 1986, 1992 by Xerox Corporation. All rights reserved. Nix, December 20, 1983 2:39 pm Russ Atkinson (RRA) April 25, 1985 5:26:16 am PST Willie-s, April 24, 1992 12:54 pm PDT Types Operations & types stolen from TextFind Operations stolen from TextFind, exported from RegExpFindImpl creates a record containing a "compiled" version of the pattern if literal is true, each character in pattern is taken literally if word flag is true, pattern only matches if no adjacent letters or digits if ignoreLooks is true, then ignore the looks of the pattern characters if ignoreCase is false, then alpha chars in pattern must match case otherwise, they can match either upper or lower if addBounds is true, add |'s to both ends of pattern name is the name of a subpattern value are looks that name had in the pattern name is the name of a subpattern value is where that subpattern matched last time Operations stolen from TextFind, exported from RegExpFind2Impl searches text for first match in [start..start+len) i.e., searches up from start until reaches start+len if finds a match, returns with found = true, at = beginning of match or location corresponding to { in pattern atEnd = end of match or location corresponding to } in pattern after = end of entire match, which may be > atEnd if used } in pattern before = start of entire match, which may be < at if used { in pattern if looksExact is true, then match by equality, else by subset if interrupt^ becomes true, will stop searching searches text for last match in [start..start+len) i.e., searches down from start+len until reaches start Operations Κ ₯•NewlineDelimiter –(cedarcode) style™codešœ™Kšœ Οeœ=™HKšœ™K™1K™%—K˜šΟk ˜ Kšœœžœ˜Kšœ žœ ˜Kšœžœžœ˜Kšœžœ˜*K˜—KšΟbœžœž œ˜(K˜™K™Kšžœžœžœ˜Kšœžœ˜Kš œžœžœžœžœžœ˜>Kšœ žœžœ˜'Kšœžœžœ˜K˜KšΟn œ˜*K˜šœ žœ˜Kšœ˜Kšœ˜Kšœ˜K˜Kšœ˜Kšœ7˜7Kšœ0˜0K˜KšœC˜CKšœG˜G—K˜šœžœžœ˜ šžœž˜Kšœžœžœ ˜'Kšœžœ˜1K˜DK˜ Kšœ žœ˜K˜)Kšœ žœ˜Kšœžœ˜Kšœžœ˜K˜LKšœ'žœ˜AKšœ˜Kšœ#žœ˜=Kšœ˜Kšœ˜Kšœžœ˜#KšœO˜OKšœžœžœ ˜"Kšœ!žœ˜4K˜.Kšœ&ž˜*Kšž˜K˜——šœ žœžœ˜'K˜—K˜Kšœ žœ˜Kšœ žœ˜Kšœžœžœ ˜'Kšœžœžœ ˜,K˜KšœžœΟc˜*Kšœžœ‘˜)Kšœ žœ‘˜$Kšœ žœ‘˜$Kšœžœ‘˜)Kšœžœ‘˜(Kšœžœ‘˜.Kšœ žœ‘˜%Kšœžœ‘˜+Kšœžœ‘˜)Kšœ žœ‘˜(Kšœ žœ‘˜(Kšœžœ‘˜+Kšœžœ‘˜)Kšœžœ‘˜*Kšœžœ‘˜)Kšœžœ‘˜9Kšœžœ‘˜)Kšœ žœ‘˜(Kšœžœ‘˜*K˜)Kšœ%Οi‘˜'K˜šœžœ˜Kšœ9˜9K˜K˜ K˜6K˜K˜Kšœ#˜#Kšœ!˜!Kšœ%˜%Kšœ˜Kšœ ˜ K˜ K˜Kšœ,˜,KšœB˜BKšœ&˜&KšœB˜BKšœ˜K˜—šœ žœg˜wK˜—Kšœ žœžœ˜Kš œžœžœžœ žœžœ ˜EKš œžœžœžœ žœžœ ˜KKš œžœžœ žœ žœžœ ˜KKš œžœžœ žœ žœžœžœ˜FKš œžœžœ žœ žœžœ ˜LKš œžœžœžœ žœžœ˜MKš œ žœžœ žœ žœžœ ˜FKš œžœžœžœ žœžœ ˜PKš œ žœžœ žœ žœžœ ˜BKšœ žœžœžœ)˜KK˜šœ žœžœ˜Kšœ žœ˜ Kšœžœ˜Kšœžœ˜Kšœžœ˜K˜—Kšœžœžœ ˜K˜šœžœžœ˜K˜ Kšœžœ˜Kšœ žœ˜Kšœ žœ˜K˜—Kšœžœžœ˜K˜šœžœžœ˜Kšœ ˜ Kšœ˜Kšœ˜Kšœ žœ ˜Kšœ žœ ˜K˜Kšœ žœžœ˜Kšœ žœž˜K˜—šœžœžœ˜ K˜——šœ'™'K˜Kšœžœ˜/šœžœ˜Kšœ‘˜Kšœ ‘˜ Kšœ ‘˜ Kšœ ‘7˜AKšœ‘#˜3Kšœ‘#˜4K˜K˜—K˜—šœ=™=K™š œžœKžœžœžœžœžœžœžœ˜³Kšœ?™?Kšœ@™@KšœK™KKšœG™GšœC™CKšœ/™/—Kšœ5™5K˜—š œžœ žœ)žœžœžœžœžœžœžœ˜¨K˜—š  œžœžœžœ˜JKšœ ™ Kšœ,™,K˜—š  œžœžœžœ žœ˜DKšœ ™ Kšœ0™0K˜——šœ>™>K™š  œžœžœ žœ žœžœžœžœžœžœžœ žœžœ˜₯K˜—š œžœ,žœ žœžœžœžœžœžœžœžœžœ žœžœ˜Ώšœ3™3Kšœ4™4—Kšœ,™,KšœA™AKšœ>™>KšœF™FKšœF™FKšœ=™=Kšœ/™/K˜—š œžœžœ žœ žœžœžœžœžœžœžœ žœžœ˜­K˜—š  œžœ,žœ žœžœžœžœžœžœžœžœžœ žœžœ˜Θšœ2™2Kšœ6™6——K˜—šœ ™ K˜š œžœžœžœ<žœ žœžœžœžœ žœ$˜ΔK˜—š œžœžœžœžœžœžœ˜–K˜—š œžœžœžœžœžœžœ˜—K˜—Kš œžœžœ ˜/K˜—K˜—…—.,G