DIRECTORY TextFind, RegExpFindPrivate USING [FinderRecord, PatternOpCodeArray, PatternLooksArray, PatternDataArray, PatternNextArray, PatternStackArray, TextStackArray, ReturnCodeArray, ReturnCode, ClassArray, CharClass, Index, OpCode], TextLooks USING [Looks, noLooks, Runs], TextEdit USING [GetRope, GetRuns], TextNode USING [RefTextNode, Offset], Rope USING [ROPE, Fetch, Size], Ascii USING [Upper], Basics USING [LongNumber, HighHalf, LowHalf], RopeEdit USING [GetCharProp], RopeReader USING [SetPosition, Get, GetIndex, Ref], RunReader USING [Ref], LooksReader USING [Get, SetPosition, Ref]; RegExpFind2Impl: CEDAR PROGRAM IMPORTS TextEdit, RopeReader, RunReader, LooksReader, RopeEdit, Rope, Ascii, Basics EXPORTS TextFind, RegExpFindPrivate = { OPEN RegExpFindPrivate; Finder: TYPE = REF FinderRec; FinderRec: PUBLIC TYPE = FinderRecord; ROPE: TYPE = Rope.ROPE; Offset: TYPE = TextNode.Offset; RefTextNode: TYPE = TextNode.RefTextNode; SearchRope: PUBLIC PROC [finder: Finder, rope: ROPE, start: Offset, len: Offset, interrupt: REF BOOL] RETURNS [found: BOOL, at, atEnd, before, after: Offset] = { [found, at, atEnd, before, after] _ Search[finder, rope, NIL, start, len, FALSE, interrupt] }; Try: PUBLIC PROC [finder: Finder, text: RefTextNode, start: Offset, len: Offset, looksExact: BOOL, interrupt: REF BOOL] RETURNS [found: BOOL, at, atEnd, before, after: Offset] = { [found, at, atEnd, before, after] _ Search[finder, TextEdit.GetRope[text], TextEdit.GetRuns[text], start, len, looksExact, interrupt]; }; Search: PUBLIC PROC [finder: Finder, rope: ROPE, runs: TextLooks.Runs, start: Offset, len: Offset, looksExact: BOOLEAN, interrupt: REF BOOL _ NIL] RETURNS [found: BOOLEAN, at, atEnd, before, after: Offset] = { IF finder.wordSearch THEN DO -- repeat search until find a word [found, at, atEnd, before, after] _ TryToFind[finder, rope, runs, start, len, looksExact, interrupt]; IF ~found OR (interrupt#NIL AND interrupt^) THEN RETURN; -- failed IF IsWord[rope, at, atEnd] THEN RETURN; -- got it start _ after; -- try again ENDLOOP; [found, at, atEnd, before, after] _ TryToFind[finder, rope, runs, start, len, looksExact, interrupt]; }; IsWord: PUBLIC PROC [rope: ROPE, at, atEnd: Offset] RETURNS [BOOLEAN] = { IF at > 0 AND RopeEdit.GetCharProp[Rope.Fetch[rope,at-1]] = alphaNumeric THEN RETURN [FALSE]; IF atEnd < Rope.Size[rope] AND RopeEdit.GetCharProp[Rope.Fetch[rope,atEnd]] = alphaNumeric THEN RETURN [FALSE]; RETURN [TRUE]; }; TryToFind: PROC [finder: Finder, rope: ROPE, runs: TextLooks.Runs, start: Offset, len: Offset, looksExact: BOOLEAN, interrupt: REF BOOL] RETURNS [found: BOOLEAN, at, atEnd, before, after: Offset] = { Character: TYPE = Index; currentChar: Character; currentCHAR: CHAR; end: Offset _ start+MIN[len, Rope.Size[rope]-start]; nextPos: Offset _ start; NoMoreChars: Character = 666; NoNodeBreakBound: Index = 32177; pc: Index _ 0; nodeBreakBound: Index _ NoNodeBreakBound; opCode: REF PatternOpCodeArray _ finder.forwardProgram.opCodes; looks: REF PatternLooksArray _ finder.forwardProgram.looks; data: REF PatternDataArray _ finder.forwardProgram.data; next: REF PatternNextArray _ finder.forwardProgram.next; pcStack: REF PatternStackArray _ finder.stack.pc; nextPosStack: REF TextStackArray _ finder.stack.nextPos; returnCodeStack: REF ReturnCodeArray _ finder.stack.returnCode; charClass: REF ClassArray _ finder.classes; stackPos: Index _ 0; ropeReader: RopeReader.Ref _ finder.ropeReader; lksReader: LooksReader.Ref _ finder.lksReader; runReader: RunReader.Ref _ finder.runReader; AdvanceChar: PROC [] = INLINE { IF nextPos < end THEN { currentChar _ (currentCHAR _ RopeReader.Get[ropeReader]) - 0C; nextPos _ nextPos + 1; } ELSE { currentChar _ NoMoreChars; currentCHAR _ '\000; nextPos _ end + 1; }; }; PushPos: PROC [returnCode: ReturnCode] = INLINE { pcStack[stackPos] _ pc; nextPosStack[stackPos] _ nextPos; returnCodeStack[stackPos] _ returnCode; stackPos _ stackPos + 1; }; TestLooks: PROC [] RETURNS [BOOL] = { sourcelks: TextLooks.Looks; IF runs=NIL THEN RETURN [FALSE]; -- pattern has looks and text doesn't IF nextPos NOT IN (start..end] THEN RETURN [FALSE]; -- boundary char has no looks LooksReader.SetPosition[lksReader, runs, nextPos-1]; sourcelks _ LooksReader.Get[lksReader]; RETURN[ looks[pc]=(IF looksExact THEN sourcelks ELSE TextLooksSupport.LooksAND[sourcelks, looks[pc]])]; }; Begin: PROC [] = { IF start < end THEN RopeReader.SetPosition[ropeReader, rope, start]; AdvanceChar[]; }; DoFieldEquals: PROC [ignoreCase: BOOL] RETURNS [BOOL] = { pos: Offset _ finder.nameArray[data[pc]].at; WHILE pos < finder.nameArray[data[pc]].atEnd DO c: CHAR _ rope.Fetch[pos]; IF ignoreCase THEN { IF Ascii.Upper[c] # Ascii.Upper[currentCHAR] THEN RETURN [FALSE]; } ELSE IF c # currentCHAR THEN RETURN [FALSE]; pos _ pos + 1; AdvanceChar[]; ENDLOOP; RETURN[TRUE]; }; DoFieldEqualsLooks: PROC [ignoreCase: BOOL] RETURNS [BOOL] = { pos: Offset _ finder.nameArray[data[pc]].at; IF runs=NIL THEN RETURN [FALSE]; -- pattern has looks and text doesn't WHILE pos < finder.nameArray[data[pc]].atEnd DO c: CHAR _ rope.Fetch[pos]; fieldLooks, nextLooks: TextLooks.Looks; IF ignoreCase THEN { IF Ascii.Upper[c] # Ascii.Upper[currentCHAR] THEN RETURN [FALSE]; } ELSE IF c # currentCHAR THEN RETURN [FALSE]; IF nextPos NOT IN (start..end] THEN RETURN [FALSE]; LooksReader.SetPosition[lksReader, runs, pos]; fieldLooks _ LooksReader.Get[lksReader]; LooksReader.SetPosition[lksReader, runs, nextPos-1]; nextLooks _ LooksReader.Get[lksReader]; IF fieldLooks # (IF looksExact THEN nextLooks ELSE TextLooksSupport.LooksAND[fieldLooks, nextLooks]) THEN RETURN[FALSE]; pos _ pos + 1; AdvanceChar[]; ENDLOOP; RETURN[TRUE]; }; DoCarefulGreedyClosureEnd: PROC [] RETURNS [BOOL] = { lastCarefulNextPos: Basics.LongNumber; lastCarefulNextPos.lowbits _ data[next[data[pc]]+1]; lastCarefulNextPos.highbits _ next[next[data[pc]]+1]; TRUSTED { RETURN[lastCarefulNextPos # LOOPHOLE[nextPos]]; }; }; Begin[]; DO DO SELECT opCode[pc] FROM matchChar => IF currentChar = data[pc] THEN AdvanceChar[] ELSE GOTO Failure; matchCharIC => IF Ascii.Upper[currentCHAR] = VAL[data[pc]] THEN AdvanceChar[] ELSE GOTO Failure; matchCharLooks => IF currentChar = data[pc] AND TestLooks[] THEN AdvanceChar[] ELSE GOTO Failure; matchCharLooksIC => IF Ascii.Upper[currentCHAR] = VAL[data[pc]] AND TestLooks[] THEN AdvanceChar[] ELSE GOTO Failure; matchClass => IF charClass[data[pc]][currentCHAR] THEN AdvanceChar[] ELSE GOTO Failure; matchClassLooks => IF charClass[data[pc]][currentCHAR] AND TestLooks[] THEN AdvanceChar[] ELSE GOTO Failure; matchAnyChar => IF currentChar # NoMoreChars THEN AdvanceChar[] ELSE GOTO Failure; matchAnyCharLooks => IF currentChar # NoMoreChars AND TestLooks[] THEN AdvanceChar[] ELSE GOTO Failure; matchNodeBreak => IF ~(nextPos=Rope.Size[rope]+1 AND currentChar=NoMoreChars) THEN GOTO Failure; skipToNodeBreak => { IF end < Rope.Size[rope] THEN GOTO Failure; nextPos _ end; RopeReader.SetPosition[ropeReader, rope, nextPos]; AdvanceChar[]; }; skipToNodeBreakLooks => { IF end < Rope.Size[rope] THEN GOTO Failure; WHILE currentChar # NoMoreChars DO IF ~TestLooks[] THEN GOTO Failure; AdvanceChar[]; ENDLOOP; }; matchBeginNode => IF nextPos # 1 THEN GOTO Failure; fail => GOTO Failure; succeed => GOTO AbsoluteSuccess; noOp => NULL; skipOverClass => { ccl: CharClass _ charClass[data[pc]]; WHILE ccl[currentCHAR] DO AdvanceChar[]; ENDLOOP; IF currentChar = NoMoreChars THEN GOTO Failure; }; skipOverClassLooks => { ccl: CharClass _ charClass[data[pc]]; WHILE ccl[currentCHAR] AND TestLooks[] DO AdvanceChar[]; ENDLOOP; IF currentChar = NoMoreChars THEN GOTO Failure; }; skipOverChar => { c: CHAR _ VAL[data[pc]]; WHILE currentCHAR = c DO AdvanceChar[]; ENDLOOP; IF currentChar = NoMoreChars THEN GOTO Failure; }; skipOverCharLooks => { c: CHAR _ VAL[data[pc]]; WHILE currentCHAR = c AND TestLooks[] DO AdvanceChar[]; ENDLOOP; IF currentChar = NoMoreChars THEN GOTO Failure; }; skipOverCharIC => { c: CHAR _ VAL[data[pc]]; WHILE Ascii.Upper[currentCHAR] = c DO AdvanceChar[]; ENDLOOP; IF currentChar = NoMoreChars THEN GOTO Failure; }; skipOverCharLooksIC => { c: CHAR _ VAL[data[pc]]; WHILE Ascii.Upper[currentCHAR] = c AND TestLooks[] DO AdvanceChar[]; ENDLOOP; IF currentChar = NoMoreChars THEN GOTO Failure; }; skipToChar => { c: CHAR _ VAL[data[pc]]; UNTIL currentCHAR = c DO IF currentChar = NoMoreChars THEN GOTO Failure; AdvanceChar[]; ENDLOOP; }; skipToCharLooks => { c: CHAR _ VAL[data[pc]]; UNTIL currentCHAR = c AND TestLooks[] DO IF currentChar = NoMoreChars THEN GOTO Failure; AdvanceChar[]; ENDLOOP; }; skipToCharIC => { c: CHAR _ VAL[data[pc]]; UNTIL Ascii.Upper[currentCHAR] = c DO IF currentChar = NoMoreChars THEN GOTO Failure; AdvanceChar[]; ENDLOOP; }; skipToCharLooksIC => { c: CHAR _ VAL[data[pc]]; UNTIL Ascii.Upper[currentCHAR] = c AND TestLooks[] DO IF currentChar = NoMoreChars THEN GOTO Failure; AdvanceChar[]; ENDLOOP; }; skipToString => { PushPos[skipToStringRet]; }; endOfSkipToString => { stackPos _ stackPos-1; nextPos _ nextPosStack[stackPos]-1; RopeReader.SetPosition[ropeReader, rope, nextPos]; AdvanceChar[]; }; skipToEnd => { nextPos _ end; AdvanceChar[]; }; skipToBeginning => { nextPos _ start; AdvanceChar[]; }; closure, greedyClosure, alt => PushPos[closureRet]; carefulClosure => { PushPos[closureRet]; data[data[pc]+1] _ Basics.LowHalf[nextPos]; next[data[pc]+1] _ Basics.HighHalf[nextPos]; }; carefulClosure, carefulGreedyClosure => { PushPos[closureRet]; data[next[pc]+1] _ Basics.LowHalf[nextPos]; next[next[pc]+1] _ Basics.HighHalf[nextPos]; }; carefulClosureEnd => { lastCarefulNextPos: Basics.LongNumber; lastCarefulNextPos.lowbits _ data[data[data[pc]]+1]; lastCarefulNextPos.highbits _ next[data[data[pc]]+1]; TRUSTED { IF lastCarefulNextPos = LOOPHOLE[nextPos] THEN GOTO Failure; }; }; carefulGreedyClosureEnd => IF ~DoCarefulGreedyClosureEnd[] THEN GOTO Failure; fieldStart => finder.nameArray[data[pc]].at _ nextPos-1; fieldEnd => finder.nameArray[data[pc]].atEnd _ MAX[nextPos-1, finder.nameArray[data[pc]].at]; boundNodeBreaks => nodeBreakBound _ data[pc]; fieldEquals => IF ~DoFieldEquals[FALSE] THEN GOTO Failure; fieldEqualsLooks => IF ~DoFieldEqualsLooks[FALSE] THEN GOTO Failure; fieldEqualsIC => IF ~DoFieldEquals[TRUE] THEN GOTO Failure; fieldEqualsLooksIC => IF ~DoFieldEqualsLooks[TRUE] THEN GOTO Failure; beginAll => { before _ nextPos-1; }; endAll => { after _ MAX[nextPos-1, before]; GOTO AbsoluteSuccess; }; ENDCASE => ERROR; pc _ next[pc]; REPEAT Failure => { IF interrupt # NIL AND interrupt^ THEN GOTO AbsoluteFailure; IF stackPos = 0 THEN GOTO AbsoluteFailure; stackPos _ stackPos - 1; SELECT returnCodeStack[stackPos] FROM skipToStringRet => { pc _ pcStack[stackPos]; nextPos _ nextPosStack[stackPos]; }; closureRet, greedyClosureRet, altRet, carefulClosureRet, carefulGreedyClosureRet => { pc _ data[pcStack[stackPos]]; nextPos _ nextPosStack[stackPos]-1; }; ENDCASE => ERROR; RopeReader.SetPosition[ropeReader, rope, nextPos]; AdvanceChar[]; }; ENDLOOP; REPEAT AbsoluteFailure => { found _ FALSE; }; AbsoluteSuccess => { found _ TRUE; at _ finder.nameArray[0].at; atEnd _ finder.nameArray[0].atEnd; }; ENDLOOP; }; }. æRegExpFind2Impl.mesa Last Edited by: Nix, December 21, 1983 12:45 pm This is not INLINE because we're up against the compiler's code size limits. This is not INLINE because we're up against the compiler's code size limits. Ê ÿ˜Jšœ™J™/J˜šÏk ˜ Jšœ ˜ IcodešœœÁ˜ØKšœ œ˜'Kšœ œ˜"Kšœ œ˜%Kšœœœ˜Kšœœ ˜Kšœœ!˜-Kšœ œ˜Kšœ œ#˜3Kšœ œ˜Kšœ œ˜*K˜—KšÏbœœ˜KšœL˜SKšœ ˜'Kšœ˜K˜Jšœœœ ˜Jšœ œœ˜&Kšœœœ˜Kšœœ˜Kšœ œ˜)K˜šÏn œœœœ)œœœ œ'˜¡Jšœ9œœ ˜[Jšœ˜J˜J˜—šŸœœœMœ œœœ œ'˜³šœ$˜$Kšœb˜b—Jšœ˜—K˜šŸœœœœ@œ œœœœ œ'˜ÑšœœœÏc"˜?J˜eJš œœ œœ œœ  ˜BJšœœœ  ˜1Jšœ  ˜Jšœ˜—šœ#˜#JšœA˜A—Jšœ˜—J˜š Ÿœœœœœœ˜Išœ˜ Jšœ;œœœ˜O—šœ˜Jšœ<œœœ˜P—Jšœœ˜J˜J˜—šŸ œœœ@œ œœœ œ'˜ÇKšœ œ ˜Kšœ˜Kšœ œ˜Kšœœ˜4K˜K˜K˜ K˜Kšœ)˜)Kšœœ4˜?Kšœœ1˜;Kšœœ/˜8Kšœœ/˜8Kšœ œ%˜1Kšœœ'˜8Kšœœ+˜?Kšœ œ˜+K˜K˜/K˜.K˜,šŸ œœœ˜šœœ˜Kšœ>˜>Kšœ˜K˜—šœ˜Kšœ˜K˜K˜K˜—K˜—šŸœœœ˜1K˜K˜!K˜'K˜K˜—šŸ œœœœ˜%J˜Jš œœœœœ %˜FJš œ œœœœœ ˜QJšœ4˜4J˜'šœ˜Kšœ œ œ œ4˜`—Jšœ˜—šŸœœ˜šœ œ˜Kšœ0˜0—Kšœ˜K˜—š Ÿ œœœœœ˜9KšœL™LK˜,šœ(˜/Kšœœ˜šœ œ˜Kšœ+œœœ˜AK˜—š˜Kšœœœœ˜'—K˜K˜Kšœ˜—Kšœœ˜ K˜—š Ÿœœœœœ˜>KšœL™LK˜,Jš œœœœœ %˜Fšœ(˜/Kšœœ˜J˜'šœ œ˜Kšœ+œœœ˜AK˜—š˜Kšœœœœ˜'—Jš œ œœœœœ˜3Jšœ.˜.J˜(Jšœ4˜4J˜'Jšœœ œ œ3œœœ˜xK˜K˜Kšœ˜—Kšœœ˜ K˜—šŸœœœœ˜5Kšœ&˜&Kšœ4˜4Kšœ5˜5šœ˜ Kšœœ ˜/K˜—K˜—K˜š˜š˜šœ ˜šœ ˜ šœ˜K˜ —š˜Kšœ ˜ ——šœ˜šœœ ˜0K˜ —š˜Kšœ ˜ ——šœ˜šœœ ˜.K˜ —š˜Kšœ ˜ ——šœ˜šœœ œ ˜@K˜ —š˜Kšœ ˜ ——šœ˜šœ"˜(K˜ —š˜Kšœ ˜ ——šœ˜šœ"œ ˜8K˜ —š˜Kšœ ˜ ——˜šœ˜!K˜ —š˜Kšœ ˜ ——˜šœœ ˜1K˜ —š˜Kšœ ˜ ——˜Kšœœœœ ˜N—šœ˜Kšœœœ ˜+Kšœ˜K˜2K˜K˜—šœ˜Kšœœœ ˜+šœ˜"Kšœœœ ˜"Kšœ˜Kšœ˜—K˜—˜Kšœ œœ ˜!—˜Kšœ ˜ —šœ ˜ Kšœ˜—˜Kšœ˜—šœ˜K˜%šœ˜K˜Kšœ˜—Kšœœœ ˜/K˜—šœ˜K˜%šœœ ˜)K˜Kšœ˜—Kšœœœ ˜/Kšœ˜—šœ˜Kšœœœ ˜šœ˜K˜Kšœ˜—Kšœœœ ˜/K˜—šœ˜Kšœœœ ˜šœœ ˜(K˜Kšœ˜—Kšœœœ ˜/K˜—šœ˜Kšœœœ ˜šœ˜%K˜Kšœ˜—Kšœœœ ˜/K˜—šœ˜Kšœœœ ˜šœœ ˜5K˜Kšœ˜—Kšœœœ ˜/K˜—šœ˜Kšœœœ ˜šœ˜Kšœœœ ˜/K˜Kšœ˜—K˜—šœ˜Kšœœœ ˜šœœ ˜(Kšœœœ ˜/K˜Kšœ˜—K˜—šœ˜Kšœœœ ˜šœ˜%Kšœœœ ˜/K˜Kšœ˜—K˜—šœ˜Kšœœœ ˜šœœ ˜5Kšœœœ ˜/K˜Kšœ˜—K˜—˜K˜K˜—˜K˜Kšœ#˜#K˜2K˜K˜—˜K˜K˜K˜—˜K˜K˜K˜—šœ˜K˜—šœ˜K˜K˜+K˜,K˜—šœ)˜)K˜K˜+K˜,K˜—šœ˜Kšœ&˜&Kšœ4˜4Kšœ5˜5šœ˜ šœœ ˜.Kšœ ˜ —K˜—K˜—šœ˜Kšœœœ ˜2—šœ ˜ K˜*—šœ ˜ šœ#˜#Kšœ+˜.——šœ˜K˜—Kš œœœœœ ˜:Kš œœœœœ ˜DKš œœœœœ ˜;Kš œœœœœ ˜Ešœ ˜ K˜K˜—šœ ˜ Kšœœ˜Kšœ˜K˜—Kšœœ˜—K˜š˜˜ Kš œ œœ œœ˜