DIRECTORY TextLooks USING [Looks, Runs], RunReader USING [Ref], LooksReader USING [Ref], TextNode USING [Offset, RefTextNode], Rope USING [ROPE], RopeReader USING [Ref]; RegularExpression: CEDAR DEFINITIONS = { MalformedPattern: ERROR [ec: PatternErrorCode]; PatternErrorCode: TYPE = { toobig, -- pattern too long endquote, -- pattern ends with ' endtilda, -- pattern ends with ~ boundary, -- pattern has | inside rather than at beginning or end missingNameEnd, -- pattern has < without matching > unmatchedNameEnd -- pattern has > without previous < }; RefTextNode: TYPE = TextNode.RefTextNode; Create: PROC [pattern: RefTextNode, literal, word, ignoreLooks, ignoreCase, addBounds: BOOL _ FALSE, patternStart: INT _ 0, patternLen: INT _ LAST[INT]] RETURNS [finder: Finder]; CreateFromRope: PROC [pattern: ROPE, literal, word, ignoreCase, addBounds: BOOL _ FALSE, patternStart: INT _ 0, patternLen: INT _ LAST[INT]] RETURNS [finder: Finder]; NameLooks: PROC [finder: Finder, name: ROPE] RETURNS [looks: TextLooks.Looks]; NameLoc: PROC [finder: Finder, name: ROPE] RETURNS [at, atEnd: INT]; SearchRope: PROC [finder: Finder, rope: ROPE, start: INT _ 0, len: INT _ LAST[INT], interrupt: REF BOOL _ NIL] RETURNS [found: BOOL, at, atEnd, before, after: INT]; Try: PROC [finder: Finder, text: RefTextNode, start: INT _ 0, len: INT _ LAST[INT], looksExact: BOOL _ FALSE, interrupt: REF BOOL _ NIL] RETURNS [found: BOOL, at, atEnd, before, after: INT]; SearchRopeBackwards: PROC [finder: Finder, rope: ROPE, start: INT _ 0, len: INT _ LAST[INT], interrupt: REF BOOL _ NIL] RETURNS [found: BOOL, at, atEnd, before, after: INT]; TryBackwards: PROC [finder: Finder, text: RefTextNode, start: INT _ 0, len: INT _ LAST[INT], looksExact: BOOL _ FALSE, interrupt: REF BOOL _ NIL] RETURNS [found: BOOL, at, atEnd, before, after: INT]; Search: PUBLIC PROC [finder: Finder, rope: ROPE, runs: TextLooks.Runs, start: Offset, len: Offset, looksExact: BOOL, interrupt: REF BOOL _ NIL] RETURNS [found: BOOL, at, atEnd, before, after: Offset]; OptimizeForwardSearch: PROC[p: ParseTree, ccl: LIST OF ParseTree, numCcl: Index] RETURNS [np: ParseTree, newCcl: LIST OF ParseTree, newNumCcl: Index]; OptimizeBackwardSearch: PROC[p: ParseTree, ccl: LIST OF ParseTree, numCcl: Index] RETURNS [np: ParseTree, newCcl: LIST OF ParseTree, newNumCcl: Index]; Compile: PROC [p: ParseTree] RETURNS [c: Code]; ROPE: TYPE = Rope.ROPE; Offset: TYPE = TextNode.Offset; CharClassContent: TYPE = PACKED ARRAY LegalCharacters OF BOOL; CharClass: TYPE = REF CharClassContent; Index: TYPE = NAT; IgnoreLooks: TextLooks.Looks = ALL[TRUE]; -- A bogus definition, but it'll work. ParseTypes: TYPE = { concat, char, charIC, class, anyChar, nodeBreak, skipToNodeBreak, beginNode, skipToBeginNode, fail, succeed, noOp, skipOverClass, skipOverChar, skipOverCharIC, skipOver, skipToChar, skipToCharIC, skipToString, skipTo, skipToEnd, skipToBeginning, closure, carefulClosure, greedyClosure, carefulGreedyClosure, alt, field, fieldEquals, fieldEqualsIC, beginAll, endAll, beginALL, endALL}; ParseTreeContent: TYPE = RECORD[ SELECT type: ParseTypes FROM concat => [concats: LIST OF ParseTree], char, charIC => [char: CHAR, looks: TextLooks.Looks], class => [class: CharClass, looks: TextLooks.Looks, classNumber: Index], anyChar => [looks: TextLooks.Looks], nodeBreak => NULL, skipToNodeBreak => [looks: TextLooks.Looks], beginNode => NULL, skipToBeginNode => NULL, fail, succeed, noOp => NULL, skipOverClass => [class: CharClass, looks: TextLooks.Looks, classNumber: Index], skipOverChar, skipOverCharIC => [char: CHAR, looks: TextLooks.Looks], skipOver => [p: ParseTree], skipToChar, skipToCharIC => [char: CHAR, looks: TextLooks.Looks], skipToString => [p: ParseTree], skipTo => [p: ParseTree], skipToEnd, skipToBeginning => NULL, closure, carefulClosure, greedyClosure, carefulGreedyClosure => [p: ParseTree], alt => [alts: LIST OF ParseTree], field => [number: Index, bound: INT, p: ParseTree], fieldEquals, fieldEqualsIC => [number: Index], beginAll, endAll, beginALL, endALL => NULL ENDCASE ]; ParseTree: TYPE = REF ParseTreeContent; EightBit: CHAR = 200C; CharMask: CHAR = 177C; LegalCharacters: TYPE = CHAR[0C..177C]; LegalInputCharacters: TYPE = CHAR[1C..177C]; beginClassToken: CHAR = EightBit + 1; -- [ endClassToken: CHAR = EightBit + 2; -- ] notToken: CHAR = EightBit + 3; -- ~ anyToken: CHAR = EightBit + 4; -- # nodeBreakToken: CHAR = EightBit + 5; -- $ closureToken: CHAR = EightBit + 6; -- * greedyClosureToken: CHAR = EightBit + 7; -- ** plusToken: CHAR = EightBit + 8; -- + greedyPlusToken: CHAR = EightBit + 9; -- ++ beginAltToken: CHAR = EightBit + 10; -- ( endAltToken: CHAR = EightBit + 11; -- ) altSepToken: CHAR = EightBit + 12; -- | beginFieldToken: CHAR = EightBit + 13; -- < endFieldToken: CHAR = EightBit + 14; -- > fieldSepToken: CHAR = EightBit + 15; -- : boundSepToken: CHAR = EightBit + 16; -- , endPatternToken: CHAR = EightBit + 17; -- end of Pattern. beginAllToken: CHAR = EightBit + 19; -- { endAllToken: CHAR = EightBit + 20; -- } subRangeToken: CHAR = EightBit + 21; -- .. beginNodeToken: CHAR = EightBit +22; -- ^ powerToken: CHAR = EightBit +23; -- ! OpCode: TYPE = { matchChar, matchCharIC, matchCharLooks, matchCharLooksIC, matchClass, matchClassLooks, matchAnyChar, matchAnyCharLooks, matchNodeBreak, skipToNodeBreak, skipToNodeBreakLooks, matchBeginNode, fail, succeed, noOp, skipOverClass, skipOverClassLooks, skipOverChar, skipOverCharLooks, skipOverCharIC, skipOverCharLooksIC, skipToChar, skipToCharLooks, skipToCharIC, skipToCharLooksIC, skipToString, endOfSkipToString, skipToEnd, skipToBeginning, closure, carefulClosure, carefulClosureEnd, greedyClosure, carefulGreedyClosure, carefulGreedyClosureEnd, alt, fieldStart, fieldEnd, boundNodeBreaks, fieldEquals, fieldEqualsLooks, fieldEqualsIC, fieldEqualsLooksIC, beginAll, endAll }; ReturnCode: TYPE = {skipToStringRet, closureRet, carefulClosureRet, greedyClosureRet, carefulGreedyClosureRet, altRet}; PatternPC: TYPE = NAT; TextStackArray: TYPE = RECORD[stack: SEQUENCE length: NAT OF Offset]; PatternStackArray: TYPE = RECORD[stack: SEQUENCE length: NAT OF PatternPC]; PatternOpCodeArray: TYPE = RECORD[opCodes: SEQUENCE length: NAT OF OpCode]; PatternDataArray: TYPE = RECORD[opCodes: SEQUENCE length: NAT OF NAT]; PatternNextArray: TYPE = RECORD[opCodes: SEQUENCE length: NAT OF PatternPC]; PatternLooksArray: TYPE = RECORD[looks: SEQUENCE length: NAT OF TextLooks.Looks]; ClassArray: TYPE = RECORD[classes: SEQUENCE length: NAT OF CharClass]; ReturnCodeArray: TYPE = RECORD[returnCodes: SEQUENCE length: NAT OF ReturnCode]; NameArray: TYPE = RECORD [array: SEQUENCE length: NAT OF NameRec]; NameRec: TYPE = RECORD [name: ROPE, at, atEnd: Offset, looks: TextLooks.Looks]; CodeContent: TYPE = RECORD[ opCodes: REF PatternOpCodeArray, looks: REF PatternLooksArray, data: REF PatternDataArray, next: REF PatternNextArray ]; Code: TYPE = REF CodeContent; StackContent: TYPE = RECORD[ pos: Index, pc: REF PatternStackArray, nextPos: REF TextStackArray, returnCode: REF ReturnCodeArray ]; Stack: TYPE = REF StackContent; FinderRecord: TYPE = RECORD [ stack: Stack, forwardProgram: Code, backwardProgram: Code, classes: REF ClassArray, nameArray: REF NameArray, ropeReader: RopeReader.Ref, lksReader: LooksReader.Ref, runReader: RunReader.Ref, wordSearch: BOOL _ FALSE ]; Finder: TYPE = REF FinderRecord; }. @RegularExpression.mesa Copyright c 1985 by Xerox Corporation. All rights reserved. Nix, December 20, 1983 2:39 pm Russ Atkinson (RRA) April 25, 1985 5:26:16 am PST Operations & types stolen from TextFind Operations stolen from TextFind, exported from RegExpFindImpl creates a record containing a "compiled" version of the pattern if literal is true, each character in pattern is taken literally if word flag is true, pattern only matches if no adjacent letters or digits if ignoreLooks is true, then ignore the looks of the pattern characters if ignoreCase is false, then alpha chars in pattern must match case otherwise, they can match either upper or lower if addBounds is true, add |'s to both ends of pattern name is the name of a subpattern value are looks that name had in the pattern name is the name of a subpattern value is where that subpattern matched last time Operations stolen from TextFind, exported from RegExpFind2Impl searches text for first match in [start..start+len) i.e., searches up from start until reaches start+len if finds a match, returns with found = true, at = beginning of match or location corresponding to { in pattern atEnd = end of match or location corresponding to } in pattern after = end of entire match, which may be > atEnd if used } in pattern before = start of entire match, which may be < at if used { in pattern if looksExact is true, then match by equality, else by subset if interrupt^ becomes true, will stop searching searches text for last match in [start..start+len) i.e., searches down from start+len until reaches start Operations Types Ê ®˜codešœ™Kšœ Ïmœ1™™>K™š¡ œžœžœ žœ žœžœžœžœžœžœžœ žœžœ˜¥K˜—š¡œžœ-žœ žœžœžœžœžœžœžœžœžœ žœžœ˜Àšœ3™3Kšœ4™4—Kšœ,™,KšœA™AKšœ>™>KšœF™FKšœF™FKšœ=™=Kšœ/™/K˜—š¡œžœžœ žœ žœžœžœžœžœžœžœ žœžœ˜­K˜—š¡ œžœ-žœ žœžœžœžœžœžœžœžœžœ žœžœ˜Éšœ2™2Kšœ6™6——K˜—šœ ™ K˜š¡œžœžœžœ@žœ žœžœžœžœ žœ$˜ÈK˜—š¡œžœžœžœžœžœžœ˜–K˜—š¡œžœžœžœžœžœžœ˜—K˜—Kš¡œžœžœ ˜/K˜—™K™Kšžœžœžœ˜Kšœžœ˜Kš œžœžœžœžœžœ˜>Kšœ žœžœ˜'Kšœžœžœ˜K˜Kšœžœžœ &˜QK˜šœ žœ˜Kšœ˜Kšœ˜Kšœ˜K˜Kšœ˜Kšœ7˜7Kšœ0˜0K˜KšœC˜CKšœG˜G—K˜šœžœžœ˜ šžœž˜Kšœžœžœ ˜'Kšœžœ˜5KšœH˜HKšœ$˜$Kšœ žœ˜Kšœ-˜-Kšœ žœ˜Kšœžœ˜Kšœžœ˜KšœP˜PKšœ'žœ˜EKšœ˜Kšœ#žœ˜AKšœ˜Kšœ˜Kšœžœ˜#KšœO˜OKšœžœžœ ˜"Kšœ!žœ˜4K˜.Kšœ&ž˜*Kšž˜K˜——šœ žœžœ˜'K˜—K˜Kšœ žœ˜Kšœ žœ˜Kšœžœžœ ˜'Kšœžœžœ ˜,K˜Kšœžœ ˜*Kšœžœ ˜)Kšœ žœ ˜$Kšœ žœ ˜$Kšœžœ ˜)Kšœžœ ˜(Kšœžœ ˜.Kšœ žœ ˜%Kšœžœ ˜+Kšœžœ ˜)Kšœ žœ ˜(Kšœ žœ ˜(Kšœžœ ˜+Kšœžœ ˜)Kšœžœ ˜*Kšœžœ ˜)Kšœžœ ˜9Kšœžœ ˜)Kšœ žœ ˜(Kšœžœ ˜*Kšœ(Ïi˜)Kšœ%¢ ˜'K˜šœžœ˜Kšœ9˜9K˜K˜ K˜6K˜K˜Kšœ#˜#Kšœ!˜!Kšœ%˜%Kšœ˜Kšœ ˜ K˜ K˜Kšœ,˜,KšœB˜BKšœ&˜&KšœB˜BKšœ˜K˜—šœ žœg˜wK˜—Kšœ žœžœ˜Kš œžœžœžœ žœžœ ˜EKš œžœžœžœ žœžœ ˜KKš œžœžœ žœ žœžœ ˜KKš œžœžœ žœ žœžœžœ˜FKš œžœžœ žœ žœžœ ˜LKš œžœžœžœ žœžœ˜QKš œ žœžœ žœ žœžœ ˜FKš œžœžœžœ žœžœ ˜PKš œ žœžœ žœ žœžœ ˜BKšœ žœžœžœ-˜OK˜šœ žœžœ˜Kšœ žœ˜ Kšœžœ˜Kšœžœ˜Kšœžœ˜K˜—Kšœžœžœ ˜K˜šœžœžœ˜K˜ Kšœžœ˜Kšœ žœ˜Kšœ žœ˜K˜—Kšœžœžœ˜K˜šœžœžœ˜Kšœ ˜ Kšœ˜Kšœ˜Kšœ žœ ˜Kšœ žœ ˜K˜K˜K˜Kšœ žœž˜K˜—šœžœžœ˜ K˜——K˜—…—,î