TextFind2Impl.mesa
Copyright © 1985 by Xerox Corporation. All rights reserved.
Paxton, February 24, 1983 9:54 am
Russ Atkinson, July 25, 1983 3:23 pm
Doug Wyatt, March 3, 1985 2:53:22 pm PST
Michael Plass, September 3, 1985 9:23:45 am PDT
DIRECTORY
Basics USING [BITAND],
LooksReader USING [FreeLooksReader, Get, GetLooksReader, InlineGet, SetPosition, Ref],
Rope USING [Fetch, ROPE, Size],
RopeEdit USING [CharProperty, GetCharProp, LowerCase, MaxLen, UpperCase],
RopeReader USING [Get, Ref, SetIndex, SetPosition],
TextEdit USING [GetRope, GetRuns],
TextFind,
TextFindPrivate,
TextLooks USING [Looks, noLooks, Runs],
TextLooksSupport USING [LooksAND],
TextNode USING [Ref];
TextFind2Impl: CEDAR PROGRAM
IMPORTS TextFindPrivate, RopeReader, TextEdit, TextLooksSupport,
LooksReader, RopeEdit, Rope, Basics
EXPORTS TextFind, TextFindPrivate = BEGIN
OPEN TextFind;
ROPE: TYPE = Rope.ROPE;
***** Operations *****
SearchRope: PUBLIC PROC [finder: Finder, rope: Rope.ROPE,
start: INT ← 0, len: INT ← RopeEdit.MaxLen, interrupt: REF BOOLNIL]
RETURNS [found: BOOL, at, atEnd, before, after: INT] = {
[found, at, atEnd, before, after] ← Search[finder, rope, NIL, start, len, FALSE, interrupt]
};
Try: PUBLIC PROC [finder: Finder, text: TextNode.Ref,
start: INT ← 0, len: INT ← RopeEdit.MaxLen,
looksExact: BOOLFALSE, interrupt: REF BOOLNIL]
RETURNS [found: BOOL, at, atEnd, before, after: INT] = {
[found, at, atEnd, before, after] ← Search[
finder, TextEdit.GetRope[text], TextEdit.GetRuns[text], start, len, looksExact, interrupt]
};
Search: PROC [finder: Finder, rope: ROPE, runs: TextLooks.Runs,
start: INT, len: INT, looksExact: BOOL, interrupt: REF BOOLNIL]
RETURNS [found: BOOL, at, atEnd, before, after: INT] = {
IF finder.wordSearch THEN DO -- repeat search until find a word
[found, at, atEnd, before, after] ← TryToFind[finder, rope, runs, start, len, looksExact];
IF NOT found OR (interrupt#NIL AND interrupt^) THEN RETURN; -- failed
IF IsWord[rope, at, atEnd] THEN RETURN; -- got it
start ← after; -- try again
ENDLOOP;
[found, at, atEnd, before, after] ←
TryToFind[finder, rope, runs, start, len, looksExact, interrupt]
};
IsWord: PUBLIC PROC [rope: ROPE, at, atEnd: INT] RETURNS [BOOL] = {
IF at > 0 AND
RopeEdit.GetCharProp[Rope.Fetch[rope,at-1]] = alphaNumeric THEN RETURN [FALSE];
IF atEnd < Rope.Size[rope] AND
RopeEdit.GetCharProp[Rope.Fetch[rope,atEnd]] = alphaNumeric THEN RETURN [FALSE];
RETURN [TRUE];
};
TryToFind: PROC [finder: Finder, rope: ROPE, runs: TextLooks.Runs, start: INT ← 0, len: INT ← RopeEdit.MaxLen, looksExact: BOOLFALSE, interrupt: REF BOOLNIL] RETURNS [found: BOOL, at, atEnd, before, after: INT] = {
patternPosStack: REF TextFindPrivate.PatternStackArray ~ finder.patternPosStack;
textPosStack: REF TextFindPrivate.TextStackArray ~ finder.textPosStack;
textLenStack: REF TextFindPrivate.TextStackArray ~ finder.textLenStack;
patternArray: REF TextFindPrivate.PatternArray ~ finder.patternArray;
length: NAT ~ finder.length;
patternLooks: REF TextFindPrivate.LooksArray ~ finder.patternLooks;
nameArray: REF TextFindPrivate.NameArray ~ finder.nameArray;
firstPatChar1: CHAR ~ finder.firstPatChar1;
firstPatChar2: CHAR ~ finder.firstPatChar2;
ropeReader: RopeReader.Ref ~ finder.ropeReader;
lksReader: LooksReader.Ref ~ finder.lksReader;
looks: TextLooks.Looks ~ finder.looks;
looksOnly: BOOL ~ finder.looksOnly;
rightBracketSeen: BOOL ← finder.rightBracketSeen;
firstPatternCharIsNormal: BOOL ~ finder.firstPatternCharIsNormal;
stackPtr, patternPos, patternAnchor: NAT ← 0;
char, patternChar: CHAR ← 377C;
charType: RopeEdit.CharProperty;
beginPos, endPos, textPos, textAnchor, end, size: INT;
psLength: NAT;
LooksMatch: PROC [txtpos: INT, ppos: NAT] RETURNS [BOOL] = {
patlks, sourcelks: TextLooks.Looks;
IF (patlks ← patternLooks[ppos]) = TextLooks.noLooks THEN RETURN [TRUE];
IF runs=NIL THEN RETURN [FALSE]; -- pattern has looks and text doesn't
IF txtpos NOT IN [start..end) THEN RETURN [FALSE]; -- boundary char has no looks
LooksReader.SetPosition[lksReader,runs,txtpos];
sourcelks ← LooksReader.Get[lksReader];
RETURN [patlks=(IF looksExact THEN sourcelks ELSE TextLooksSupport.LooksAND[sourcelks,patlks])]
};
GetChar: PROC [txtpos: INT] RETURNS [char: CHAR] = {
SELECT txtpos FROM
IN [start..end) => { -- read the character from the rope
char ← Rope.Fetch[rope, txtpos];
RopeReader.SetPosition[ropeReader,rope,txtpos+1];
};
ENDCASE => ERROR;
};
PropTest: TYPE = { eq, ne, any };
MaxCount: PROC [propTest: PropTest, property: RopeEdit.CharProperty ← illegal]
RETURNS [count: INT] = {
count ← 0;
DO
IF textPos+count NOT IN [start..end) THEN EXIT;
char ← GetChar[textPos+count];
IF propTest=eq THEN IF RopeEdit.GetCharProp[char] # property THEN EXIT ELSE NULL
ELSE IF propTest=ne AND RopeEdit.GetCharProp[char]=property THEN EXIT ELSE NULL;
IF patternLooks # NIL AND patternLooks[patternPos] # TextLooks.noLooks
AND NOT LooksMatch[textPos+count, patternPos] THEN EXIT;
count ← count+1;
ENDLOOP;
};
size ← Rope.Size[rope];
start ← MIN[MAX[0,start],size];
len ← MIN[MAX[0,len],size-start];
end ← start+len;
found ← FALSE;
IF looksOnly THEN {
looksReader: LooksReader.Ref ~ LooksReader.GetLooksReader[];
LooksReader.SetPosition[looksReader, runs, start];
at ← start;
WHILE at < end DO
lks: TextLooks.Looks ← LooksReader.InlineGet[looksReader];
IF NOT looksExact THEN lks ← TextLooksSupport.LooksAND[lks, looks];
IF lks = looks THEN EXIT; -- have found a match
at ← at+1;
ENDLOOP;
LooksReader.FreeLooksReader[looksReader];
IF at >= end THEN RETURN; -- failed to find a match
RETURN [TRUE, at, at+1, at, at+1]
};
psLength ← length;
UNTIL psLength = 0 DO -- discard trailing "any's"
SELECT TextFindPrivate.Pat[patternArray[psLength-1]] FROM
TextFindPrivate.anyStringPat, TextFindPrivate.anyAlphaPat, TextFindPrivate.anyNonAlphaPat, TextFindPrivate.anyBlankPat, TextFindPrivate.anyNonBlankPat => NULL;
ENDCASE => EXIT;
psLength ← psLength-1;
ENDLOOP;
IF psLength=0 THEN RETURN [TRUE,start,start,start,start]; -- null pattern
at ← start;
RopeReader.SetPosition[ropeReader, rope, at];
DO -- text loop
IF firstPatternCharIsNormal THEN {
IF firstPatChar1 = TextFindPrivate.leftBoundaryPattern THEN {
IF at > 0 THEN RETURN; -- failure since not at left boundary
patternPos ← 1; textPos ← 0
}
ELSE { -- search for next instance of first pattern char
at ← MAX[start,at];
RopeReader.SetIndex[ropeReader, at];
UNTIL at >= end DO
SELECT RopeReader.Get[ropeReader] FROM
firstPatChar1, firstPatChar2 =>
IF patternLooks=NIL OR LooksMatch[at,0] THEN EXIT;
ENDCASE;
at ← at+1;
ENDLOOP;
patternPos ← 1; textPos ← at + 1
}
}
ELSE { patternPos ← 0; textPos ← at };
IF at >= end THEN EXIT;
stackPtr ← patternAnchor ← 0;
before ← beginPos ← textAnchor ← at;
DO -- pattern loop
IF patternPos >= psLength THEN { -- have finished pattern
found ← TRUE;
textPos ← MIN[end,textPos]; -- in case used final boundary char in making the match
at ← MAX[start,beginPos]; -- in case used initial boundary char in making the match
before ← MAX[before,start]; -- in case used initial boundary char in making the match
atEnd ← IF rightBracketSeen THEN endPos ELSE textPos;
after ← textPos;
GO TO Return
};
IF interrupt#NIL AND interrupt^ THEN GO TO Return;
WITH p:patternArray[patternPos] SELECT FROM
startname => { nameArray[p.index].at ← textPos; patternPos ← patternPos+1 };
endname => { nameArray[p.index].atEnd ← textPos; patternPos ← patternPos+1 };
not => { -- check that next character is not the one in this pattern element
IF textPos NOT IN [start..end) THEN EXIT;
char ← GetChar[textPos];
SELECT patternChar ← p.char FROM
char => IF patternLooks=NIL OR LooksMatch[textPos,patternPos] THEN EXIT;
>= TextFindPrivate.EightBit => { -- check both upper and lower case
IF (SELECT patternChar ← CharBits[patternChar] FROM
IN ['A..'Z] => patternChar = RopeEdit.UpperCase[char],
IN ['a ..'z] => patternChar = RopeEdit.LowerCase[char],
ENDCASE => patternChar = char)
AND (patternLooks=NIL OR LooksMatch[textPos,patternPos])
THEN EXIT
}; -- chars match
ENDCASE;
patternPos ← patternPos+1; textPos ← textPos + 1
};
pattern => { SELECT patternChar ← p.char FROM
TextFindPrivate.leftBracketPattern => { beginPos ← textPos; patternPos ← patternPos+1 };
TextFindPrivate.rightBracketPattern => { endPos ← textPos; patternPos ← patternPos+1 };
TextFindPrivate.nopPattern => patternPos ← patternPos+1;
TextFindPrivate.anyStringPattern => {
IF patternLooks # NIL AND patternLooks[patternPos] # TextLooks.noLooks THEN {
stackPtr ← stackPtr + 1;
textPosStack[stackPtr] ← textPos;
textLenStack[stackPtr] ← -1;
patternPosStack[stackPtr] ← patternPos
}
ELSE { textAnchor ← textPos; patternAnchor ← patternPos + 1; stackPtr ← 0 };
patternPos ← patternPos + 1
};
TextFindPrivate.anyNonAlphaPattern, TextFindPrivate.anyAlphaPattern, TextFindPrivate.anyNonBlankPattern, TextFindPrivate.anyBlankPattern => {
stackPtr ← stackPtr + 1;
textPosStack[stackPtr] ← textPos;
textLenStack[stackPtr] ← -1;
patternPosStack[stackPtr] ← patternPos;
patternPos ← patternPos + 1
};
TextFindPrivate.maxStringPattern => {
IF patternLooks # NIL AND patternLooks[patternPos] # TextLooks.noLooks THEN {
stackPtr ← stackPtr + 1;
textPosStack[stackPtr] ← textPos;
textLenStack[stackPtr] ← MaxCount[any];
patternPosStack[stackPtr] ← patternPos
}
ELSE {
stackPtr ← 1;
textPosStack[stackPtr] ← textPos;
textLenStack[stackPtr] ← end-textPos;
patternPosStack[stackPtr] ← patternPos
};
textPos ← textPos + textLenStack[stackPtr];
patternPos ← patternPos + 1
};
TextFindPrivate.maxNonAlphaPattern, TextFindPrivate.maxAlphaPattern, TextFindPrivate.maxNonBlankPattern, TextFindPrivate.maxBlankPattern => {
stackPtr ← stackPtr + 1;
textPosStack[stackPtr] ← textPos;
textLenStack[stackPtr] ← SELECT patternChar FROM
TextFindPrivate.maxNonAlphaPattern => MaxCount[ne, alphaNumeric],
TextFindPrivate.maxAlphaPattern => MaxCount[eq, alphaNumeric],
TextFindPrivate.maxNonBlankPattern => MaxCount[ne, white],
TextFindPrivate.maxBlankPattern => MaxCount[eq, white],
ENDCASE => ERROR;
textPos ← textPos + textLenStack[stackPtr];
patternPosStack[stackPtr] ← patternPos;
patternPos ← patternPos + 1
};
ENDCASE => { -- check next character from text
boundary: BOOLFALSE;
IF textPos IN [start..end) THEN {char ← GetChar[textPos]}
ELSE {
char ← TextFindPrivate.rightBoundaryPattern;
IF patternChar # TextFindPrivate.rightBoundaryPattern THEN boundary ← TRUE;
};
IF NOT boundary AND patternChar = TextFindPrivate.oneCharPattern AND
(patternLooks = NIL OR LooksMatch[textPos,patternPos]) THEN {
IF patternPos # 0 AND patternPos = patternAnchor THEN {
first char(s) of * segment
patternAnchor ← patternAnchor + 1; textAnchor ← textPos + 1
};
patternPos ← patternPos + 1; textPos ← textPos + 1
}
ELSE {
IF NOT boundary AND (SELECT patternChar FROM
char => TRUE, -- this also takes care of rightBoundaryPattern
TextFindPrivate.oneNonAlphaPattern => RopeEdit.GetCharProp[char] # alphaNumeric,
TextFindPrivate.oneAlphaPattern => RopeEdit.GetCharProp[char] = alphaNumeric,
TextFindPrivate.oneNonBlankPattern => RopeEdit.GetCharProp[char] # white,
TextFindPrivate.oneBlankPattern => RopeEdit.GetCharProp[char] = white,
TextFindPrivate.oneCharPattern => FALSE, -- known from above that looks don't match
>= TextFindPrivate.EightBit => -- check both upper and lower case
SELECT patternChar ← CharBits[patternChar] FROM
IN ['A..'Z] => patternChar = RopeEdit.UpperCase[char],
IN ['a ..'z] => patternChar = RopeEdit.LowerCase[char],
ENDCASE => patternChar = char,
ENDCASE => FALSE)
AND (patternLooks=NIL OR LooksMatch[textPos,patternPos])
THEN -- chars match -- {
patternPos ← patternPos + 1; textPos ← textPos + 1
}
ELSE { -- chars don't match; try to change some wild card position
WHILE stackPtr # 0 DO
txtpos: INT ← textPosStack[stackPtr];
txtlen: INT ← textLenStack[stackPtr];
IF interrupt#NIL AND interrupt^ THEN GO TO Return;
IF txtlen < 0 THEN { -- this is an incrementing wildcard
ppos: NAT;
boundary ← FALSE;
IF txtpos IN [start..end) THEN {
charType ← RopeEdit.GetCharProp[GetChar[txtpos]];
}
ELSE {
boundary ← TRUE
};
IF NOT boundary AND
(SELECT TextFindPrivate.Pat[patternArray[ppos←patternPosStack[stackPtr]]] FROM
TextFindPrivate.anyNonAlphaPat => charType # alphaNumeric,
TextFindPrivate.anyAlphaPat => charType = alphaNumeric,
TextFindPrivate.anyNonBlankPat => charType # white,
TextFindPrivate.anyBlankPat => charType = white,
TextFindPrivate.anyStringPat => TRUE,
ENDCASE => ERROR)
AND (patternLooks=NIL OR LooksMatch[txtpos,ppos]) THEN {
patternPos ← ppos + 1;
textPos ← textPosStack[stackPtr] ← txtpos + 1;
EXIT
}
}
ELSE IF txtlen > 0 THEN { -- this is a decrementing wildcard
patternPos ← patternPosStack[stackPtr] + 1;
textPos ← textPosStack[stackPtr] + txtlen - 1;
textLenStack[stackPtr] ← txtlen - 1;
EXIT
}
ELSE NULL; -- decrementing wildcard with no place left to go
stackPtr ← stackPtr - 1;
ENDLOOP;
IF stackPtr = 0 THEN -- failed to match a stacked wild card
IF patternAnchor > 0 AND textAnchor < end THEN {
there was a * with no looks, so can advance it
patternPos ← patternAnchor;
textPos ← textAnchor ← textAnchor + 1
}
ELSE EXIT --start matching over at next text location--
}
}
}
};
ENDCASE;
ENDLOOP; -- end of pattern loop
at ← at+1; -- start over with next character
ENDLOOP; -- end of text loop
EXITS Return => NULL;
}; -- of Try --
CharBits: PROC [c: CHAR] RETURNS [CHAR] = INLINE {
RETURN [LOOPHOLE[Basics.BITAND[LOOPHOLE[TextFindPrivate.CharMask],LOOPHOLE[c]], CHAR]];
};
END.