DIRECTORY Ascii USING [Upper], Basics USING [HighHalf, LongNumber, LowHalf], LooksReader USING [Get, SetPosition, Ref], RegularExpression USING [CharClass, ClassArray, FinderRecord, Index, LegalCharacters, OpCode, PatternDataArray, PatternLooksArray, PatternNextArray, PatternOpCodeArray, PatternStackArray, ReturnCode, ReturnCodeArray, Search, TextStackArray], Rope USING [Fetch, ROPE, Size], RopeEdit USING [GetCharProp], RopeReader USING [Get, GetIndex, Ref, SetPosition], RunReader USING [Ref], TextEdit USING [GetRope, GetRuns], TextLooks USING [Looks, Runs], TextLooksSupport USING [LooksAND], TextNode USING [Offset, RefTextNode]; RegExpFind2Impl: CEDAR PROGRAM IMPORTS Ascii, Basics, LooksReader, RegularExpression, Rope, RopeEdit, RopeReader, TextEdit, TextLooksSupport EXPORTS RegularExpression = { OPEN RegularExpression; Finder: TYPE = REF FinderRec; FinderRec: PUBLIC TYPE = FinderRecord; ROPE: TYPE = Rope.ROPE; Offset: TYPE = TextNode.Offset; RefTextNode: TYPE = TextNode.RefTextNode; SearchRope: PUBLIC PROC [finder: Finder, rope: ROPE, start: Offset, len: Offset, interrupt: REF BOOL] RETURNS [found: BOOL, at, atEnd, before, after: Offset] = { [found, at, atEnd, before, after] _ Search[finder, rope, NIL, start, len, FALSE, interrupt] }; Try: PUBLIC PROC [finder: Finder, text: RefTextNode, start: Offset, len: Offset, looksExact: BOOL, interrupt: REF BOOL] RETURNS [found: BOOL, at, atEnd, before, after: Offset] = { [found, at, atEnd, before, after] _ Search[finder, TextEdit.GetRope[text], TextEdit.GetRuns[text], start, len, looksExact, interrupt]; }; Search: PUBLIC PROC [finder: Finder, rope: ROPE, runs: TextLooks.Runs, start: Offset, len: Offset, looksExact: BOOLEAN, interrupt: REF BOOL _ NIL] RETURNS [found: BOOLEAN, at, atEnd, before, after: Offset] = { IF finder.wordSearch THEN DO -- repeat search until find a word [found, at, atEnd, before, after] _ TryToFind[finder, rope, runs, start, len, looksExact, interrupt]; IF ~found OR (interrupt#NIL AND interrupt^) THEN RETURN; -- failed IF IsWord[rope, at, atEnd] THEN RETURN; -- got it start _ after; -- try again ENDLOOP; [found, at, atEnd, before, after] _ TryToFind[finder, rope, runs, start, len, looksExact, interrupt]; }; IsWord: PROC [rope: ROPE, at, atEnd: Offset] RETURNS [BOOLEAN] = { IF at > 0 AND RopeEdit.GetCharProp[Rope.Fetch[rope,at-1]] = alphaNumeric THEN RETURN [FALSE]; IF atEnd < Rope.Size[rope] AND RopeEdit.GetCharProp[Rope.Fetch[rope,atEnd]] = alphaNumeric THEN RETURN [FALSE]; RETURN [TRUE]; }; TryToFind: PROC [finder: Finder, rope: ROPE, runs: TextLooks.Runs, start: Offset, len: Offset, looksExact: BOOLEAN, interrupt: REF BOOL] RETURNS [found: BOOLEAN, at, atEnd, before, after: Offset] = { Character: TYPE = Index; currentChar: Character; currentCHAR: CHAR; end: Offset _ start+MIN[len, Rope.Size[rope]-start]; nextPos: Offset _ start; NoMoreChars: Character = 666; NoNodeBreakBound: Index = 32177; pc: Index _ 0; nodeBreakBound: Index _ NoNodeBreakBound; opCode: REF PatternOpCodeArray _ finder.forwardProgram.opCodes; looks: REF PatternLooksArray _ finder.forwardProgram.looks; data: REF PatternDataArray _ finder.forwardProgram.data; next: REF PatternNextArray _ finder.forwardProgram.next; pcStack: REF PatternStackArray _ finder.stack.pc; nextPosStack: REF TextStackArray _ finder.stack.nextPos; returnCodeStack: REF ReturnCodeArray _ finder.stack.returnCode; charClass: REF ClassArray _ finder.classes; stackPos: Index _ 0; ropeReader: RopeReader.Ref _ finder.ropeReader; lksReader: LooksReader.Ref _ finder.lksReader; runReader: RunReader.Ref _ finder.runReader; AdvanceChar: PROC [] = { IF nextPos < end THEN { currentChar _ (currentCHAR _ RopeReader.Get[ropeReader]) - 0C; nextPos _ nextPos + 1; } ELSE { currentChar _ NoMoreChars; currentCHAR _ '\000; nextPos _ end + 1; }; }; PushPos: PROC [returnCode: ReturnCode] = { pcStack[stackPos] _ pc; nextPosStack[stackPos] _ nextPos; returnCodeStack[stackPos] _ returnCode; stackPos _ stackPos + 1; }; TestLooks: PROC [] RETURNS [BOOL] = { sourcelks: TextLooks.Looks; IF runs=NIL THEN RETURN [FALSE]; -- pattern has looks and text doesn't IF nextPos NOT IN (start..end] THEN RETURN [FALSE]; -- boundary char has no looks LooksReader.SetPosition[lksReader, runs, nextPos-1]; sourcelks _ LooksReader.Get[lksReader]; RETURN[ looks[pc]=(IF looksExact THEN sourcelks ELSE TextLooksSupport.LooksAND[sourcelks, looks[pc]])]; }; Begin: PROC [] = { IF start < end THEN RopeReader.SetPosition[ropeReader, rope, start]; AdvanceChar[]; }; DoFieldEquals: PROC [ignoreCase: BOOL] RETURNS [BOOL] = { pos: Offset _ finder.nameArray[data[pc]].at; WHILE pos < finder.nameArray[data[pc]].atEnd DO c: CHAR _ rope.Fetch[pos]; IF ignoreCase THEN { IF Ascii.Upper[c] # Ascii.Upper[currentCHAR] THEN RETURN [FALSE]; } ELSE IF c # currentCHAR THEN RETURN [FALSE]; pos _ pos + 1; AdvanceChar[]; ENDLOOP; RETURN[TRUE]; }; DoFieldEqualsLooks: PROC [ignoreCase: BOOL] RETURNS [BOOL] = { pos: Offset _ finder.nameArray[data[pc]].at; IF runs=NIL THEN RETURN [FALSE]; -- pattern has looks and text doesn't WHILE pos < finder.nameArray[data[pc]].atEnd DO c: CHAR _ rope.Fetch[pos]; fieldLooks, nextLooks: TextLooks.Looks; IF ignoreCase THEN { IF Ascii.Upper[c] # Ascii.Upper[currentCHAR] THEN RETURN [FALSE]; } ELSE IF c # currentCHAR THEN RETURN [FALSE]; IF nextPos NOT IN (start..end] THEN RETURN [FALSE]; LooksReader.SetPosition[lksReader, runs, pos]; fieldLooks _ LooksReader.Get[lksReader]; LooksReader.SetPosition[lksReader, runs, nextPos-1]; nextLooks _ LooksReader.Get[lksReader]; IF fieldLooks # (IF looksExact THEN nextLooks ELSE TextLooksSupport.LooksAND[fieldLooks, nextLooks]) THEN RETURN[FALSE]; pos _ pos + 1; AdvanceChar[]; ENDLOOP; RETURN[TRUE]; }; DoCarefulGreedyClosureEnd: PROC [] RETURNS [BOOL] = { lastCarefulNextPos: Basics.LongNumber; lastCarefulNextPos.lowbits _ data[next[data[pc]]+1]; lastCarefulNextPos.highbits _ next[next[data[pc]]+1]; TRUSTED { RETURN[lastCarefulNextPos # LOOPHOLE[nextPos]]; }; }; Begin[]; DO DO { SELECT opCode[pc] FROM matchChar => IF currentChar = data[pc] THEN GO TO advance ELSE GO TO Failure; matchCharIC => IF Ascii.Upper[currentCHAR] = VAL[data[pc]] THEN GO TO advance ELSE GO TO Failure; matchCharLooks => IF currentChar = data[pc] AND TestLooks[] THEN GO TO advance ELSE GO TO Failure; matchCharLooksIC => IF Ascii.Upper[currentCHAR] = VAL[data[pc]] AND TestLooks[] THEN GO TO advance ELSE GO TO Failure; matchClass => IF currentCHAR IN LegalCharacters AND charClass[data[pc]][currentCHAR] THEN GO TO advance ELSE GO TO Failure; matchClassLooks => IF currentCHAR IN LegalCharacters AND charClass[data[pc]][currentCHAR] AND TestLooks[] THEN GO TO advance ELSE GO TO Failure; matchAnyChar => IF currentChar # NoMoreChars THEN GO TO advance ELSE GO TO Failure; matchAnyCharLooks => IF currentChar # NoMoreChars AND TestLooks[] THEN GO TO advance ELSE GO TO Failure; matchNodeBreak => IF ~(nextPos=Rope.Size[rope]+1 AND currentChar=NoMoreChars) THEN GO TO Failure; skipToNodeBreak => { IF end < Rope.Size[rope] THEN GO TO Failure; nextPos _ end; RopeReader.SetPosition[ropeReader, rope, nextPos]; GO TO advance; }; skipToNodeBreakLooks => { IF end < Rope.Size[rope] THEN GO TO Failure; WHILE currentChar # NoMoreChars DO IF ~TestLooks[] THEN GO TO Failure; AdvanceChar[]; ENDLOOP; }; matchBeginNode => IF nextPos # 1 THEN GO TO Failure; fail => GO TO Failure; succeed => GO TO AbsoluteSuccess; noOp => NULL; skipOverClass => { ccl: CharClass _ charClass[data[pc]]; WHILE currentCHAR IN LegalCharacters AND ccl[currentCHAR] DO AdvanceChar[]; ENDLOOP; GO TO failAtEnd; }; skipOverClassLooks => { ccl: CharClass _ charClass[data[pc]]; WHILE currentCHAR IN LegalCharacters AND ccl[currentCHAR] AND TestLooks[] DO AdvanceChar[]; ENDLOOP; GO TO failAtEnd; }; skipOverChar => { c: CHAR _ VAL[data[pc]]; WHILE currentCHAR = c DO AdvanceChar[]; ENDLOOP; GO TO failAtEnd; }; skipOverCharLooks => { c: CHAR _ VAL[data[pc]]; WHILE currentCHAR = c AND TestLooks[] DO AdvanceChar[]; ENDLOOP; GO TO failAtEnd; }; skipOverCharIC => { c: CHAR _ VAL[data[pc]]; WHILE Ascii.Upper[currentCHAR] = c DO AdvanceChar[]; ENDLOOP; GO TO failAtEnd; }; skipOverCharLooksIC => { c: CHAR _ VAL[data[pc]]; WHILE Ascii.Upper[currentCHAR] = c AND TestLooks[] DO AdvanceChar[]; ENDLOOP; GO TO failAtEnd; }; skipToChar => { c: CHAR _ VAL[data[pc]]; UNTIL currentCHAR = c DO IF currentChar = NoMoreChars THEN GO TO Failure; AdvanceChar[]; ENDLOOP; }; skipToCharLooks => { c: CHAR _ VAL[data[pc]]; UNTIL currentCHAR = c AND TestLooks[] DO IF currentChar = NoMoreChars THEN GO TO Failure; AdvanceChar[]; ENDLOOP; }; skipToCharIC => { c: CHAR _ VAL[data[pc]]; UNTIL Ascii.Upper[currentCHAR] = c DO IF currentChar = NoMoreChars THEN GO TO Failure; AdvanceChar[]; ENDLOOP; }; skipToCharLooksIC => { c: CHAR _ VAL[data[pc]]; UNTIL Ascii.Upper[currentCHAR] = c AND TestLooks[] DO IF currentChar = NoMoreChars THEN GO TO Failure; AdvanceChar[]; ENDLOOP; }; skipToString => { PushPos[skipToStringRet]; }; endOfSkipToString => { stackPos _ stackPos-1; nextPos _ nextPosStack[stackPos]-1; RopeReader.SetPosition[ropeReader, rope, nextPos]; GO TO advance; }; skipToEnd => { nextPos _ end; GO TO advance; }; skipToBeginning => { nextPos _ start; GO TO advance; }; closure, greedyClosure, alt => PushPos[closureRet]; carefulClosure => { PushPos[closureRet]; data[data[pc]+1] _ Basics.LowHalf[nextPos]; next[data[pc]+1] _ Basics.HighHalf[nextPos]; }; carefulClosure, carefulGreedyClosure => { PushPos[closureRet]; data[next[pc]+1] _ Basics.LowHalf[nextPos]; next[next[pc]+1] _ Basics.HighHalf[nextPos]; }; carefulClosureEnd => { lastCarefulNextPos: Basics.LongNumber; lastCarefulNextPos.lowbits _ data[data[data[pc]]+1]; lastCarefulNextPos.highbits _ next[data[data[pc]]+1]; IF lastCarefulNextPos = LOOPHOLE[nextPos] THEN GO TO Failure; }; carefulGreedyClosureEnd => IF ~DoCarefulGreedyClosureEnd[] THEN GO TO Failure; fieldStart => finder.nameArray[data[pc]].at _ nextPos-1; fieldEnd => finder.nameArray[data[pc]].atEnd _ MAX[nextPos-1, finder.nameArray[data[pc]].at]; boundNodeBreaks => nodeBreakBound _ data[pc]; fieldEquals => IF ~DoFieldEquals[FALSE] THEN GO TO Failure; fieldEqualsLooks => IF ~DoFieldEqualsLooks[FALSE] THEN GO TO Failure; fieldEqualsIC => IF ~DoFieldEquals[TRUE] THEN GO TO Failure; fieldEqualsLooksIC => IF ~DoFieldEqualsLooks[TRUE] THEN GO TO Failure; beginAll => before _ nextPos-1; endAll => { after _ MAX[nextPos-1, before]; GO TO AbsoluteSuccess; }; ENDCASE => ERROR; EXITS failAtEnd => IF currentChar = NoMoreChars THEN GO TO Failure; advance => AdvanceChar[]; }; pc _ next[pc]; REPEAT Failure => { IF interrupt # NIL AND interrupt^ THEN GO TO AbsoluteFailure; IF stackPos = 0 THEN GO TO AbsoluteFailure; stackPos _ stackPos - 1; SELECT returnCodeStack[stackPos] FROM skipToStringRet => { pc _ pcStack[stackPos]; nextPos _ nextPosStack[stackPos]; }; closureRet, greedyClosureRet, altRet, carefulClosureRet, carefulGreedyClosureRet => { pc _ data[pcStack[stackPos]]; nextPos _ nextPosStack[stackPos]-1; }; ENDCASE => ERROR; RopeReader.SetPosition[ropeReader, rope, nextPos]; AdvanceChar[]; }; ENDLOOP; REPEAT AbsoluteFailure => { found _ FALSE; }; AbsoluteSuccess => { found _ TRUE; at _ finder.nameArray[0].at; atEnd _ finder.nameArray[0].atEnd; }; ENDLOOP; }; SearchRopeBackwards: PUBLIC PROC [finder: Finder, rope: ROPE, start: Offset, len: Offset, interrupt: REF BOOL _ NIL] RETURNS [found: BOOLEAN, at, atEnd, before, after: Offset] = { [found, at, atEnd, before, after] _ RegularExpression.Search[finder, rope, NIL, start, len, FALSE, interrupt]; }; TryBackwards: PUBLIC PROC [finder: Finder, text: RefTextNode, start: Offset, len: Offset, looksExact: BOOLEAN _ FALSE, interrupt: REF BOOL _ NIL] RETURNS [found: BOOLEAN, at, atEnd, before, after: Offset] = { [found, at, atEnd, before, after] _ RegularExpression.Search[finder, TextEdit.GetRope[text], TextEdit.GetRuns[text], start, len, looksExact, interrupt]; }; }. άRegExpFind2Impl.mesa Copyright c 1985 by Xerox Corporation. All rights reserved. Nix, December 21, 1983 12:45 pm Russ Atkinson (RRA) April 25, 1985 5:29:29 am PST Peter Kessler September 30, 1986 10:54:52 am PDT Κή˜codešœ™Kšœ Οmœ1™˜>Kšœ˜K˜—šžœ˜Kšœ˜K˜K˜K˜——K˜—š œžœ˜*K˜K˜!K˜'K˜K˜—š  œžœžœžœ˜%K˜Kš žœžœžœžœžœ‘%˜FKš žœ žœžœžœžœžœ‘˜QKšœ4˜4K˜'šžœ˜Kšœ žœ žœ žœ4˜`—Kšœ˜—š œžœ˜Kšžœ žœ1˜DKšœ˜K˜—š   œžœžœžœžœ˜9K˜,šžœ(ž˜/Kšœžœ˜šžœ ˜ šžœ˜Kšžœ+žœžœžœ˜AK˜—šž˜Kšžœžœžœžœ˜'——K˜K˜Kšžœ˜—Kšžœžœ˜ K˜—š  œžœžœžœžœ˜>K˜,Kš žœžœžœžœžœ‘%˜Fšžœ(ž˜/Kšœžœ˜K˜'šžœ ˜ šžœ˜Kšžœ+žœžœžœ˜AK˜—šž˜Kšžœžœžœžœ˜'——Kš žœ žœžœžœžœžœ˜3Kšœ.˜.K˜(Kšœ4˜4K˜'Kšžœžœ žœ žœ3žœžœžœ˜xK˜K˜Kšžœ˜—Kšžœžœ˜ K˜—š œžœžœžœ˜5Kšœ&˜&Kšœ4˜4Kšœ5˜5šžœ˜ Kšžœžœ ˜/K˜—K˜—K˜šž˜šž˜˜šžœ ž˜šœ ˜ Kš žœžœžœžœ ž œ ˜@—šœ˜Kš žœžœ žœžœžœž œ ˜R—šœ˜Kš žœžœ žœžœžœžœ ˜P—šœ˜Kšžœžœ žœ žœžœžœžœ ˜b—šœ˜šžœ žœžœ!˜FKšžœžœžœ˜Kšžœžœžœ ˜——šœ˜šžœ žœžœ"žœ ˜VKšžœžœžœžœ ˜&——˜Kš žœžœžœžœžœ ˜C—˜Kš žœžœ žœžœžœžœ ˜S—˜Kš žœžœžœžœžœ ˜O—šœ˜Kšžœžœžœžœ ˜,Kšœ˜K˜2Kšžœžœ ˜K˜—šœ˜Kšžœžœžœžœ ˜,šžœž˜"Kšžœžœžœžœ ˜#Kšœ˜Kšžœ˜—K˜—Kš œžœ žœžœžœ ˜4Kšœžœžœ ˜Kšœ žœžœ˜!Kšœžœ˜ šœ˜K˜%šžœ žœžœž˜@