<> <> <> <> DIRECTORY TextLooks USING [Looks, Runs], RunReader USING [Ref], LooksReader USING [Ref], TextNode USING [Offset, RefTextNode], Rope USING [ROPE], RopeReader USING [Ref]; RegularExpression: CEDAR DEFINITIONS = { <> MalformedPattern: ERROR [ec: PatternErrorCode]; PatternErrorCode: TYPE = { toobig, -- pattern too long endquote, -- pattern ends with ' endtilda, -- pattern ends with ~ boundary, -- pattern has | inside rather than at beginning or end missingNameEnd, -- pattern has < without matching > unmatchedNameEnd -- pattern has > without previous < }; RefTextNode: TYPE = TextNode.RefTextNode; <> <<>> Create: PROC [pattern: RefTextNode, literal, word, ignoreLooks, ignoreCase, addBounds: BOOL _ FALSE, patternStart: INT _ 0, patternLen: INT _ LAST[INT]] RETURNS [finder: Finder]; <> <> <> <> <> <> <> CreateFromRope: PROC [pattern: ROPE, literal, word, ignoreCase, addBounds: BOOL _ FALSE, patternStart: INT _ 0, patternLen: INT _ LAST[INT]] RETURNS [finder: Finder]; NameLooks: PROC [finder: Finder, name: ROPE] RETURNS [looks: TextLooks.Looks]; <> <> NameLoc: PROC [finder: Finder, name: ROPE] RETURNS [at, atEnd: INT]; <> <> <> <<>> SearchRope: PROC [finder: Finder, rope: ROPE, start: INT _ 0, len: INT _ LAST[INT], interrupt: REF BOOL _ NIL] RETURNS [found: BOOL, at, atEnd, before, after: INT]; Try: PROC [finder: Finder, text: RefTextNode, start: INT _ 0, len: INT _ LAST[INT], looksExact: BOOL _ FALSE, interrupt: REF BOOL _ NIL] RETURNS [found: BOOL, at, atEnd, before, after: INT]; <> <> <> <> <> < atEnd if used } in pattern>> <> <> <> SearchRopeBackwards: PROC [finder: Finder, rope: ROPE, start: INT _ 0, len: INT _ LAST[INT], interrupt: REF BOOL _ NIL] RETURNS [found: BOOL, at, atEnd, before, after: INT]; TryBackwards: PROC [finder: Finder, text: RefTextNode, start: INT _ 0, len: INT _ LAST[INT], looksExact: BOOL _ FALSE, interrupt: REF BOOL _ NIL] RETURNS [found: BOOL, at, atEnd, before, after: INT]; <> <> <> Search: PUBLIC PROC [finder: Finder, rope: ROPE, runs: TextLooks.Runs, start: Offset, len: Offset, looksExact: BOOL, interrupt: REF BOOL _ NIL] RETURNS [found: BOOL, at, atEnd, before, after: Offset]; OptimizeForwardSearch: PROC[p: ParseTree, ccl: LIST OF ParseTree, numCcl: Index] RETURNS [np: ParseTree, newCcl: LIST OF ParseTree, newNumCcl: Index]; OptimizeBackwardSearch: PROC[p: ParseTree, ccl: LIST OF ParseTree, numCcl: Index] RETURNS [np: ParseTree, newCcl: LIST OF ParseTree, newNumCcl: Index]; Compile: PROC [p: ParseTree] RETURNS [c: Code]; <> <<>> ROPE: TYPE = Rope.ROPE; Offset: TYPE = TextNode.Offset; CharClassContent: TYPE = PACKED ARRAY LegalCharacters OF BOOL; CharClass: TYPE = REF CharClassContent; Index: TYPE = NAT; IgnoreLooks: TextLooks.Looks = ALL[TRUE]; -- A bogus definition, but it'll work. ParseTypes: TYPE = { concat, char, charIC, class, anyChar, nodeBreak, skipToNodeBreak, beginNode, skipToBeginNode, fail, succeed, noOp, skipOverClass, skipOverChar, skipOverCharIC, skipOver, skipToChar, skipToCharIC, skipToString, skipTo, skipToEnd, skipToBeginning, closure, carefulClosure, greedyClosure, carefulGreedyClosure, alt, field, fieldEquals, fieldEqualsIC, beginAll, endAll, beginALL, endALL}; ParseTreeContent: TYPE = RECORD[ SELECT type: ParseTypes FROM concat => [concats: LIST OF ParseTree], char, charIC => [char: CHAR, looks: TextLooks.Looks], class => [class: CharClass, looks: TextLooks.Looks, classNumber: Index], anyChar => [looks: TextLooks.Looks], nodeBreak => NULL, skipToNodeBreak => [looks: TextLooks.Looks], beginNode => NULL, skipToBeginNode => NULL, fail, succeed, noOp => NULL, skipOverClass => [class: CharClass, looks: TextLooks.Looks, classNumber: Index], skipOverChar, skipOverCharIC => [char: CHAR, looks: TextLooks.Looks], skipOver => [p: ParseTree], skipToChar, skipToCharIC => [char: CHAR, looks: TextLooks.Looks], skipToString => [p: ParseTree], skipTo => [p: ParseTree], skipToEnd, skipToBeginning => NULL, closure, carefulClosure, greedyClosure, carefulGreedyClosure => [p: ParseTree], alt => [alts: LIST OF ParseTree], field => [number: Index, bound: INT, p: ParseTree], fieldEquals, fieldEqualsIC => [number: Index], beginAll, endAll, beginALL, endALL => NULL ENDCASE ]; ParseTree: TYPE = REF ParseTreeContent; EightBit: CHAR = 200C; CharMask: CHAR = 177C; LegalCharacters: TYPE = CHAR[0C..177C]; LegalInputCharacters: TYPE = CHAR[1C..177C]; beginClassToken: CHAR = EightBit + 1; -- [ endClassToken: CHAR = EightBit + 2; -- ] notToken: CHAR = EightBit + 3; -- ~ anyToken: CHAR = EightBit + 4; -- # nodeBreakToken: CHAR = EightBit + 5; -- $ closureToken: CHAR = EightBit + 6; -- * greedyClosureToken: CHAR = EightBit + 7; -- ** plusToken: CHAR = EightBit + 8; -- + greedyPlusToken: CHAR = EightBit + 9; -- ++ beginAltToken: CHAR = EightBit + 10; -- ( endAltToken: CHAR = EightBit + 11; -- ) altSepToken: CHAR = EightBit + 12; -- | beginFieldToken: CHAR = EightBit + 13; -- < endFieldToken: CHAR = EightBit + 14; -- > fieldSepToken: CHAR = EightBit + 15; -- : boundSepToken: CHAR = EightBit + 16; -- , endPatternToken: CHAR = EightBit + 17; -- end of Pattern. beginAllToken: CHAR = EightBit + 19; -- { endAllToken: CHAR = EightBit + 20; -- } subRangeToken: CHAR = EightBit + 21; -- .. beginNodeToken: CHAR = EightBit +22; -- ^ powerToken: CHAR = EightBit +23; -- ! OpCode: TYPE = { matchChar, matchCharIC, matchCharLooks, matchCharLooksIC, matchClass, matchClassLooks, matchAnyChar, matchAnyCharLooks, matchNodeBreak, skipToNodeBreak, skipToNodeBreakLooks, matchBeginNode, fail, succeed, noOp, skipOverClass, skipOverClassLooks, skipOverChar, skipOverCharLooks, skipOverCharIC, skipOverCharLooksIC, skipToChar, skipToCharLooks, skipToCharIC, skipToCharLooksIC, skipToString, endOfSkipToString, skipToEnd, skipToBeginning, closure, carefulClosure, carefulClosureEnd, greedyClosure, carefulGreedyClosure, carefulGreedyClosureEnd, alt, fieldStart, fieldEnd, boundNodeBreaks, fieldEquals, fieldEqualsLooks, fieldEqualsIC, fieldEqualsLooksIC, beginAll, endAll }; ReturnCode: TYPE = {skipToStringRet, closureRet, carefulClosureRet, greedyClosureRet, carefulGreedyClosureRet, altRet}; PatternPC: TYPE = NAT; TextStackArray: TYPE = RECORD[stack: SEQUENCE length: NAT OF Offset]; PatternStackArray: TYPE = RECORD[stack: SEQUENCE length: NAT OF PatternPC]; PatternOpCodeArray: TYPE = RECORD[opCodes: SEQUENCE length: NAT OF OpCode]; PatternDataArray: TYPE = RECORD[opCodes: SEQUENCE length: NAT OF NAT]; PatternNextArray: TYPE = RECORD[opCodes: SEQUENCE length: NAT OF PatternPC]; PatternLooksArray: TYPE = RECORD[looks: SEQUENCE length: NAT OF TextLooks.Looks]; ClassArray: TYPE = RECORD[classes: SEQUENCE length: NAT OF CharClass]; ReturnCodeArray: TYPE = RECORD[returnCodes: SEQUENCE length: NAT OF ReturnCode]; NameArray: TYPE = RECORD [array: SEQUENCE length: NAT OF NameRec]; NameRec: TYPE = RECORD [name: ROPE, at, atEnd: Offset, looks: TextLooks.Looks]; CodeContent: TYPE = RECORD[ opCodes: REF PatternOpCodeArray, looks: REF PatternLooksArray, data: REF PatternDataArray, next: REF PatternNextArray ]; Code: TYPE = REF CodeContent; StackContent: TYPE = RECORD[ pos: Index, pc: REF PatternStackArray, nextPos: REF TextStackArray, returnCode: REF ReturnCodeArray ]; Stack: TYPE = REF StackContent; FinderRecord: TYPE = RECORD [ stack: Stack, forwardProgram: Code, backwardProgram: Code, classes: REF ClassArray, nameArray: REF NameArray, ropeReader: RopeReader.Ref, lksReader: LooksReader.Ref, runReader: RunReader.Ref, wordSearch: BOOL _ FALSE ]; Finder: TYPE = REF FinderRecord; }.