<> <> <> DIRECTORY Basics, TextFind, TextFindPrivate, TextLooks, TextLooksSupport, TextEdit, TextNode, RopeEdit, Rope, RopeReader, RunReader, LooksReader; TextFind3Impl: CEDAR PROGRAM IMPORTS TextFindPrivate, TextEdit, TextLooksSupport, LooksReader, RopeEdit, RopeReader, RunReader, Rope, Basics EXPORTS TextFind = BEGIN OPEN TextFind, TextFindPrivate, RopeEdit; <<-- ***** Operations *****>> Finder: TYPE = REF FinderRec; FinderRec: PUBLIC TYPE = FinderRecord; noMoreChars: SIGNAL = CODE; SearchRopeBackwards: PUBLIC PROC [finder: Finder, rope: Rope.ROPE, start: Offset _ 0, len: Offset _ MaxLen, interrupt: REF BOOL _ NIL] RETURNS [found: BOOLEAN, at, atEnd, before, after: Offset] = { [found, at, atEnd, before, after] _ SearchBackwards[finder, rope, NIL, start, len, FALSE, interrupt] }; TryBackwards: PUBLIC PROC [finder: Finder, text: RefTextNode, start: Offset _ 0, len: Offset _ MaxLen, looksExact: BOOLEAN _ FALSE, interrupt: REF BOOL _ NIL] RETURNS [found: BOOLEAN, at, atEnd, before, after: Offset] = { [found, at, atEnd, before, after] _ SearchBackwards[finder, TextEdit.GetRope[text], TextEdit.GetRuns[text], start, len, looksExact, interrupt] }; SearchBackwards: PROC [finder: Finder, rope: ROPE, runs: TextLooks.Runs, start: Offset, len: Offset, looksExact: BOOLEAN, interrupt: REF BOOL _ NIL] RETURNS [found: BOOLEAN, at, atEnd, before, after: Offset] = { IF finder.wordSearch THEN DO -- repeat search until find a word [found, at, atEnd, before, after] _ TryToFindBackwards[finder, rope, runs, start, len, looksExact]; IF ~found OR (interrupt#NIL AND interrupt^) THEN RETURN; -- failed IF IsWord[rope, at, atEnd] THEN RETURN; -- got it len _ before-start; -- try again ENDLOOP; [found, at, atEnd, before, after] _ TryToFindBackwards[finder, rope, runs, start, len, looksExact, interrupt] }; TryToFindBackwards: PROC [ finder: Finder, rope: ROPE, runs: TextLooks.Runs, start: Offset _ 0, len: Offset _ MaxLen, looksExact: BOOLEAN _ FALSE, interrupt: REF BOOL _ NIL] RETURNS [found: BOOLEAN, at, atEnd, before, after: Offset] = { OPEN finder; stackPtr, patternPos, patternAnchor, patternFirst: NAT _ 0; char, patternChar: CHAR _ 377C; charType: CharProperty; beginPos, endPos, textPos, textAnchor, end, size: Offset; psLength: NAT; LooksMatch: PROC [txtpos: Offset, ppos: NAT] RETURNS [BOOLEAN] = { patlks, sourcelks: TextLooks.Looks; IF (patlks _ patternLooks[ppos-1]) = TextLooks.noLooks THEN RETURN [TRUE]; IF runs=NIL THEN RETURN [FALSE]; -- pattern has looks and text doesn't IF txtpos NOT IN (start..end] THEN RETURN [FALSE]; -- boundary char has no looks LooksReader.SetPosition[lksReader,runs,txtpos]; sourcelks _ LooksReader.Backwards[lksReader]; RETURN [patlks=(IF looksExact THEN sourcelks ELSE TextLooksSupport.LooksAND[sourcelks,patlks])] }; GetChar: PROC [txtpos: Offset] RETURNS [char: CHAR] = { SELECT txtpos FROM IN (start..end] => { -- read the character from the rope RopeReader.SetPosition[ropeReader,rope,txtpos]; char _ RopeReader.Backwards[ropeReader] }; <<--start => char _ leftBoundaryPattern;>> <<--end+1 => char _ rightBoundaryPattern;>> ENDCASE => SIGNAL noMoreChars }; -- failure return; have run out of characters PropTest: TYPE = { eq, ne, any }; MaxCount: PROC [propTest: PropTest, property: CharProperty _ illegal] RETURNS [count: INT] = { count _ 0; DO char _ GetChar[textPos-count ! noMoreChars => EXIT]; IF propTest=eq THEN IF GetCharProp[char] # property THEN EXIT ELSE NULL ELSE IF propTest=ne AND GetCharProp[char]=property THEN EXIT ELSE NULL; IF patternLooks # NIL AND patternLooks[patternPos] # TextLooks.noLooks AND ~LooksMatch[textPos-count, patternPos] THEN EXIT; count _ count+1; ENDLOOP; }; size _ Rope.Size[rope]; start _ MIN[MAX[0,start],size]; len _ MIN[MAX[0,len],size-start]; end _ start+len; found _ FALSE; atEnd _ end; IF looksOnly THEN { lks: TextLooks.Looks; runLen: Offset; RunReader.SetPosition[runReader,runs,end]; atEnd _ end; WHILE atEnd > start DO IF runs=NIL THEN { runLen _ len; lks _ TextLooks.noLooks } ELSE [runLen,lks] _ RunReader.Backwards[runReader]; IF ~looksExact THEN lks _ TextLooksSupport.LooksAND[lks,looks]; IF lks = looks THEN EXIT; -- have found a match atEnd _ atEnd-runLen; ENDLOOP; IF atEnd <= start THEN RETURN; -- failed to find a match RETURN [TRUE,atEnd-1,atEnd,atEnd-1,atEnd] }; psLength _ length; UNTIL patternFirst = psLength DO -- discard leading "any's" SELECT Pat[patternArray[patternFirst]] FROM anyStringPat, anyAlphaPat, anyNonAlphaPat, anyBlankPat, anyNonBlankPat => NULL; ENDCASE => EXIT; patternFirst _ patternFirst+1; ENDLOOP; IF patternFirst = psLength THEN RETURN [TRUE,end,end,end,end]; -- null pattern DO -- text loop IF lastPatternCharIsNormal THEN { IF lastPatChar1 = rightBoundaryPattern THEN { IF atEnd < size THEN RETURN; -- failed since not at end of node patternPos _ psLength-1; textPos _ atEnd } ELSE { -- search for next instance of last pattern char atEnd _ MIN[end,atEnd]; RopeReader.SetPosition[ropeReader,rope,atEnd]; UNTIL atEnd <= start DO SELECT RopeReader.Backwards[ropeReader] FROM lastPatChar1, lastPatChar2 => IF patternLooks=NIL OR LooksMatch[atEnd,psLength] THEN EXIT; ENDCASE; atEnd _ atEnd-1; ENDLOOP; patternPos _ psLength-1; textPos _ atEnd - 1 }} ELSE { patternPos _ psLength; textPos _ atEnd }; IF atEnd <= start THEN EXIT; stackPtr _ 0; patternAnchor _ psLength; after _ endPos _ textAnchor _ atEnd; DO -- pattern loop IF patternPos <= patternFirst THEN { -- have finished pattern found _ TRUE; textPos _ MAX[start,textPos]; -- in case used initial boundary char in making the match atEnd _ MIN[end,endPos]; -- in case used final boundary char in making the match after _ MIN[after,end]; -- in case used final boundary char in making the match at _ IF leftBracketSeen THEN beginPos ELSE textPos; before _ textPos; GO TO Return }; IF interrupt#NIL AND interrupt^ THEN GO TO Return; WITH p:patternArray[patternPos-1] SELECT FROM startname => { nameArray[p.index].at _ textPos; patternPos _ patternPos-1 }; endname => { nameArray[p.index].atEnd _ textPos; patternPos _ patternPos-1 }; not => { -- check that next character is not the one in this pattern element char _ GetChar[textPos ! noMoreChars => EXIT]; SELECT patternChar _ p.char FROM char => IF patternLooks=NIL OR LooksMatch[textPos,patternPos] THEN EXIT; >= EightBit => { -- check both upper and lower case IF (SELECT patternChar _ CharBits[patternChar] FROM IN ['A..'Z] => patternChar = UpperCase[char], IN ['a ..'z] => patternChar = LowerCase[char], ENDCASE => patternChar = char) AND (patternLooks=NIL OR LooksMatch[textPos,patternPos]) THEN EXIT }; -- chars match ENDCASE; patternPos _ patternPos-1; textPos _ textPos - 1 }; pattern => { SELECT patternChar _ p.char FROM leftBracketPattern => { beginPos _ textPos; patternPos _ patternPos-1 }; rightBracketPattern => { endPos _ textPos; patternPos _ patternPos-1 }; nopPattern => patternPos _ patternPos-1; anyStringPattern => { IF patternLooks # NIL AND patternLooks[patternPos-1] # TextLooks.noLooks THEN { stackPtr _ stackPtr + 1; textPosStack[stackPtr] _ textPos; patternPosStack[stackPtr] _ patternPos } ELSE { textAnchor _ textPos; patternAnchor _ patternPos - 1; stackPtr _ 0 }; patternPos _ patternPos - 1 }; anyNonAlphaPattern, anyAlphaPattern, anyNonBlankPattern, anyBlankPattern => { stackPtr _ stackPtr + 1; textPosStack[stackPtr] _ textPos; patternPosStack[stackPtr] _ patternPos; patternPos _ patternPos - 1 }; maxStringPattern => { IF patternLooks # NIL AND patternLooks[patternPos] # TextLooks.noLooks THEN { stackPtr _ stackPtr + 1; textPosStack[stackPtr] _ textPos; textLenStack[stackPtr] _ MaxCount[any]; patternPosStack[stackPtr] _ patternPos } ELSE { stackPtr _ 1; textPosStack[stackPtr] _ textPos; textLenStack[stackPtr] _ textPos-start; patternPosStack[stackPtr] _ patternPos }; textPos _ textPos - textLenStack[stackPtr]; patternPos _ patternPos - 1 }; maxNonAlphaPattern, maxAlphaPattern, maxNonBlankPattern, maxBlankPattern => { stackPtr _ stackPtr + 1; textPosStack[stackPtr] _ textPos; textLenStack[stackPtr] _ SELECT patternChar FROM maxNonAlphaPattern => MaxCount[ne, alphaNumeric], maxAlphaPattern => MaxCount[eq, alphaNumeric], maxNonBlankPattern => MaxCount[ne, white], maxBlankPattern => MaxCount[eq, white], ENDCASE => ERROR; textPos _ textPos - textLenStack[stackPtr]; patternPosStack[stackPtr] _ patternPos; patternPos _ patternPos - 1 }; ENDCASE => { -- check next character from text boundary: BOOL _ FALSE; char _ GetChar[textPos ! noMoreChars => { char _ leftBoundaryPattern; IF patternChar # leftBoundaryPattern THEN boundary _ TRUE; CONTINUE }]; IF ~boundary AND patternChar = oneCharPattern AND (patternLooks = NIL OR LooksMatch[textPos,patternPos]) THEN { IF patternPos # psLength AND patternPos = patternAnchor THEN { -- first char(s) of * segment patternAnchor _ patternAnchor - 1; textAnchor _ textPos - 1 }; patternPos _ patternPos - 1; textPos _ textPos - 1 } ELSE { IF ~boundary AND (SELECT patternChar FROM char => TRUE, -- this also takes care of leftBoundaryPattern oneNonAlphaPattern => GetCharProp[char] # alphaNumeric, oneAlphaPattern => GetCharProp[char] = alphaNumeric, oneNonBlankPattern => GetCharProp[char] # white, oneBlankPattern => GetCharProp[char] = white, oneCharPattern => FALSE, -- known from above that looks don't match >= EightBit => -- check both upper and lower case SELECT patternChar _ CharBits[patternChar] FROM IN ['A..'Z] => patternChar = UpperCase[char], IN ['a ..'z] => patternChar = LowerCase[char], ENDCASE => patternChar = char, ENDCASE => FALSE) AND (patternLooks=NIL OR LooksMatch[textPos,patternPos]) THEN -- chars match -- { patternPos _ patternPos - 1; textPos _ textPos - 1 } ELSE { -- chars don't match; try to increment some wild card position WHILE stackPtr # 0 DO txtpos: Offset _ textPosStack[stackPtr]; txtlen: Offset _ textLenStack[stackPtr]; IF interrupt#NIL AND interrupt^ THEN GO TO Return; IF txtlen < 0 THEN { -- this is an incrementing wildcard boundary: BOOL _ FALSE; ppos: NAT; charType _ GetCharProp[GetChar[txtpos ! noMoreChars => { boundary _ TRUE; CONTINUE }]]; IF ~boundary AND (SELECT Pat[patternArray[(ppos_patternPosStack[stackPtr])-1]] FROM anyNonAlphaPat => charType # alphaNumeric, anyAlphaPat => charType = alphaNumeric, anyNonBlankPat => charType # white, anyBlankPat => charType = white, anyStringPat => TRUE, ENDCASE => ERROR) AND (patternLooks=NIL OR LooksMatch[txtpos,ppos]) THEN { patternPos _ ppos - 1; textPos _ textPosStack[stackPtr] _ txtpos - 1; EXIT }} ELSE IF txtlen > 0 THEN { -- this is a decrementing wildcard patternPos _ patternPosStack[stackPtr] - 1; textPos _ textPosStack[stackPtr] - txtlen + 1; textLenStack[stackPtr] _ txtlen - 1; EXIT } ELSE NULL; -- decrementing wildcard with no place left to go stackPtr _ stackPtr - 1; ENDLOOP; IF stackPtr = 0 THEN -- failed to match a stacked wild card IF patternAnchor < psLength AND textAnchor > start THEN { -- there was a * with no looks, so can advance it patternPos _ patternAnchor; textPos _ textAnchor _ textAnchor - 1 } ELSE EXIT --start matching over at next text location-- }}}}; ENDCASE; ENDLOOP; -- end of pattern loop atEnd _ atEnd-1; -- start over with next character ENDLOOP; -- end of text loop EXITS Return => NULL; }; CharBits: PROC [c: CHAR] RETURNS [CHAR] = INLINE { RETURN [LOOPHOLE[Basics.BITAND[LOOPHOLE[CharMask],LOOPHOLE[c]], CHAR]]; }; END.