TextFind3Impl.Mesa
last written by Paxton, February 24, 1983 10:07 am
last written by McGregor, February 25, 1983 3:12 pm
DIRECTORY
Inline,
LooksReader,
Rope,
RopeEdit,
RopeReader,
RunReader,
TextEdit,
TextFind,
TextFindPrivate,
TiogaLooks,
TiogaLooksSupport,
TiogaNodeOps;
TextFind3Impl: CEDAR PROGRAM
IMPORTS Inline, LooksReader, Rope, RopeEdit, RopeReader, RunReader, TextFindPrivate, TiogaLooksSupport, TiogaNodeOps
EXPORTS TextFind =
BEGIN OPEN TextFind, TextFindPrivate, RopeEdit;
***** Operations *****
Finder: TYPE = REF FinderRec;
FinderRec: PUBLIC TYPE = FinderRecord;
noMoreChars: SIGNAL = CODE;
SearchRopeBackwards: PUBLIC PROC [finder: Finder, rope: Rope.ROPE,
start: Offset ← 0, len: Offset ← MaxLen, interrupt: REF BOOLNIL]
RETURNS [found: BOOLEAN, at, atEnd, before, after: Offset] = {
[found, at, atEnd, before, after] ←
SearchBackwards[finder, rope, NIL, start, len, FALSE, interrupt] };
TryBackwards: PUBLIC PROC [finder: Finder, text: RefTextNode,
start: Offset ← 0, len: Offset ← MaxLen,
looksExact: BOOLEANFALSE, interrupt: REF BOOLNIL]
RETURNS [found: BOOLEAN, at, atEnd, before, after: Offset] = {
[found, at, atEnd, before, after] ←
SearchBackwards[finder, TiogaNodeOps.GetRope[text], TiogaNodeOps.GetRuns[text],
start, len, looksExact, interrupt] };
SearchBackwards: PROC [finder: Finder, rope: ROPE, runs: TiogaLooks.Runs,
start: Offset, len: Offset, looksExact: BOOLEAN, interrupt: REF BOOLNIL]
RETURNS [found: BOOLEAN, at, atEnd, before, after: Offset] = {
IF finder.wordSearch THEN DO -- repeat search until find a word
[found, at, atEnd, before, after] ← TryToFindBackwards[finder, rope, runs, start, len, looksExact];
IF ~found OR (interrupt#NIL AND interrupt^) THEN RETURN; -- failed
IF IsWord[rope, at, atEnd] THEN RETURN; -- got it
len ← before-start; -- try again
ENDLOOP;
[found, at, atEnd, before, after] ←
TryToFindBackwards[finder, rope, runs, start, len, looksExact, interrupt] };
TryToFindBackwards: PROC [
finder: Finder, rope: ROPE, runs: TiogaLooks.Runs,
start: Offset ← 0, len: Offset ← MaxLen, looksExact: BOOLEANFALSE,
interrupt: REF BOOLNIL]
RETURNS [found: BOOLEAN, at, atEnd, before, after: Offset] = { OPEN finder;
stackPtr, patternPos, patternAnchor, patternFirst: NAT ← 0;
char, patternChar: CHAR ← 377C;
charType: CharProperty;
beginPos, endPos, textPos, textAnchor, end, size: Offset;
psLength: NAT;
LooksMatch: PROC [txtpos: Offset, ppos: NAT] RETURNS [BOOLEAN] = {
patlks, sourcelks: TiogaLooks.Looks;
IF (patlks ← patternLooks[ppos-1]) = TiogaLooks.noLooks THEN RETURN [TRUE];
IF runs=NIL THEN RETURN [FALSE]; -- pattern has looks and text doesn't
IF txtpos NOT IN (start..end] THEN RETURN [FALSE]; -- boundary char has no looks
LooksReader.SetPosition[lksReader,runs,txtpos];
sourcelks ← LooksReader.Backwards[lksReader];
RETURN [patlks=(IF looksExact THEN sourcelks ELSE TiogaLooksSupport.LooksAND[sourcelks,patlks])] };
GetChar: PROC [txtpos: Offset] RETURNS [char: CHAR] = {
SELECT txtpos FROM
IN (start..end] => { -- read the character from the rope
RopeReader.SetPosition[ropeReader,rope,txtpos]; char ← RopeReader.Backwards[ropeReader] };
--start => char ← leftBoundaryPattern;
--end+1 => char ← rightBoundaryPattern;
ENDCASE => SIGNAL noMoreChars }; -- failure return; have run out of characters
PropTest: TYPE = { eq, ne, any };
MaxCount: PROC [propTest: PropTest, property: CharProperty ← illegal]
RETURNS [count: INT] = {
count ← 0;
DO
char ← GetChar[textPos-count ! noMoreChars => EXIT];
IF propTest=eq THEN IF GetCharProp[char] # property THEN EXIT ELSE NULL
ELSE IF propTest=ne AND GetCharProp[char]=property THEN EXIT ELSE NULL;
IF patternLooks # NIL AND patternLooks[patternPos] # TiogaLooks.noLooks
AND ~LooksMatch[textPos-count, patternPos] THEN EXIT;
count ← count+1;
ENDLOOP;
};
size ← Rope.Size[rope];
start ← MIN[MAX[0,start],size];
len ← MIN[MAX[0,len],size-start];
end ← start+len;
found ← FALSE;
atEnd ← end;
IF looksOnly THEN {
lks: TiogaLooks.Looks;
runLen: Offset;
RunReader.SetPosition[runReader,runs,end];
atEnd ← end;
WHILE atEnd > start DO
IF runs=NIL THEN { runLen ← len; lks ← TiogaLooks.noLooks }
ELSE [runLen,lks] ← RunReader.Backwards[runReader];
IF ~looksExact THEN lks ← TiogaLooksSupport.LooksAND[lks,looks];
IF lks = looks THEN EXIT; -- have found a match
atEnd ← atEnd-runLen;
ENDLOOP;
IF atEnd <= start THEN RETURN; -- failed to find a match
RETURN [TRUE,atEnd-1,atEnd,atEnd-1,atEnd] };
psLength ← length;
UNTIL patternFirst = psLength DO -- discard leading "any's"
SELECT Pat[patternArray[patternFirst]] FROM
anyStringPat, anyAlphaPat, anyNonAlphaPat, anyBlankPat, anyNonBlankPat => NULL;
ENDCASE => EXIT;
patternFirst ← patternFirst+1;
ENDLOOP;
IF patternFirst = psLength THEN RETURN [TRUE,end,end,end,end]; -- null pattern
DO -- text loop
IF lastPatternCharIsNormal THEN {
IF lastPatChar1 = rightBoundaryPattern THEN {
IF atEnd < size THEN RETURN; -- failed since not at end of node
patternPos ← psLength-1; textPos ← atEnd }
ELSE { -- search for next instance of last pattern char
atEnd ← MIN[end,atEnd];
RopeReader.SetPosition[ropeReader,rope,atEnd];
UNTIL atEnd <= start DO
SELECT RopeReader.Backwards[ropeReader] FROM
lastPatChar1, lastPatChar2 =>
IF patternLooks=NIL OR LooksMatch[atEnd,psLength] THEN EXIT;
ENDCASE;
atEnd ← atEnd-1;
ENDLOOP;
patternPos ← psLength-1; textPos ← atEnd - 1 }}
ELSE { patternPos ← psLength; textPos ← atEnd };
IF atEnd <= start THEN EXIT;
stackPtr ← 0;
patternAnchor ← psLength;
after ← endPos ← textAnchor ← atEnd;
DO -- pattern loop
IF patternPos <= patternFirst THEN { -- have finished pattern
found ← TRUE;
textPos ← MAX[start,textPos]; -- in case used initial boundary char in making the match
atEnd ← MIN[end,endPos]; -- in case used final boundary char in making the match
after ← MIN[after,end]; -- in case used final boundary char in making the match
at ← IF leftBracketSeen THEN beginPos ELSE textPos;
before ← textPos;
GO TO Return };
IF interrupt#NIL AND interrupt^ THEN GO TO Return;
WITH p:patternArray[patternPos-1] SELECT FROM
startname => { nameArray[p.index].at ← textPos; patternPos ← patternPos-1 };
endname => { nameArray[p.index].atEnd ← textPos; patternPos ← patternPos-1 };
not => { -- check that next character is not the one in this pattern element
char ← GetChar[textPos ! noMoreChars => EXIT];
SELECT patternChar ← p.char FROM
char => IF patternLooks=NIL OR LooksMatch[textPos,patternPos] THEN EXIT;
>= EightBit => { -- check both upper and lower case
IF (SELECT patternChar ← CharBits[patternChar] FROM
IN ['A..'Z] => patternChar = UpperCase[char],
IN ['a ..'z] => patternChar = LowerCase[char],
ENDCASE => patternChar = char)
AND (patternLooks=NIL OR LooksMatch[textPos,patternPos])
THEN EXIT }; -- chars match
ENDCASE;
patternPos ← patternPos-1; textPos ← textPos - 1 };
pattern => { SELECT patternChar ← p.char FROM
leftBracketPattern => { beginPos ← textPos; patternPos ← patternPos-1 };
rightBracketPattern => { endPos ← textPos; patternPos ← patternPos-1 };
nopPattern => patternPos ← patternPos-1;
anyStringPattern => {
IF patternLooks # NIL AND
patternLooks[patternPos-1] # TiogaLooks.noLooks THEN {
stackPtr ← stackPtr + 1;
textPosStack[stackPtr] ← textPos;
patternPosStack[stackPtr] ← patternPos }
ELSE { textAnchor ← textPos; patternAnchor ← patternPos - 1; stackPtr ← 0 };
patternPos ← patternPos - 1 };
anyNonAlphaPattern, anyAlphaPattern, anyNonBlankPattern, anyBlankPattern => {
stackPtr ← stackPtr + 1;
textPosStack[stackPtr] ← textPos;
patternPosStack[stackPtr] ← patternPos;
patternPos ← patternPos - 1 };
maxStringPattern => {
IF patternLooks # NIL AND patternLooks[patternPos] # TiogaLooks.noLooks THEN {
stackPtr ← stackPtr + 1;
textPosStack[stackPtr] ← textPos;
textLenStack[stackPtr] ← MaxCount[any];
patternPosStack[stackPtr] ← patternPos }
ELSE {
stackPtr ← 1;
textPosStack[stackPtr] ← textPos;
textLenStack[stackPtr] ← textPos-start;
patternPosStack[stackPtr] ← patternPos };
textPos ← textPos - textLenStack[stackPtr];
patternPos ← patternPos - 1 };
maxNonAlphaPattern, maxAlphaPattern, maxNonBlankPattern, maxBlankPattern => {
stackPtr ← stackPtr + 1;
textPosStack[stackPtr] ← textPos;
textLenStack[stackPtr] ← SELECT patternChar FROM
maxNonAlphaPattern => MaxCount[ne, alphaNumeric],
maxAlphaPattern => MaxCount[eq, alphaNumeric],
maxNonBlankPattern => MaxCount[ne, white],
maxBlankPattern => MaxCount[eq, white],
ENDCASE => ERROR;
textPos ← textPos - textLenStack[stackPtr];
patternPosStack[stackPtr] ← patternPos;
patternPos ← patternPos - 1 };
ENDCASE => { -- check next character from text
boundary: BOOLFALSE;
char ← GetChar[textPos ! noMoreChars => {
char ← leftBoundaryPattern;
IF patternChar # leftBoundaryPattern THEN boundary ← TRUE;
CONTINUE }];
IF ~boundary AND patternChar = oneCharPattern AND
(patternLooks = NIL OR LooksMatch[textPos,patternPos]) THEN {
IF patternPos # psLength AND patternPos = patternAnchor THEN { -- first char(s) of * segment
patternAnchor ← patternAnchor - 1; textAnchor ← textPos - 1 };
patternPos ← patternPos - 1; textPos ← textPos - 1 }
ELSE {
IF ~boundary AND (SELECT patternChar FROM
char => TRUE, -- this also takes care of leftBoundaryPattern
oneNonAlphaPattern => GetCharProp[char] # alphaNumeric,
oneAlphaPattern => GetCharProp[char] = alphaNumeric,
oneNonBlankPattern => GetCharProp[char] # white,
oneBlankPattern => GetCharProp[char] = white,
oneCharPattern => FALSE, -- known from above that looks don't match
>= EightBit => -- check both upper and lower case
SELECT patternChar ← CharBits[patternChar] FROM
IN ['A..'Z] => patternChar = UpperCase[char],
IN ['a ..'z] => patternChar = LowerCase[char],
ENDCASE => patternChar = char,
ENDCASE => FALSE)
AND (patternLooks=NIL OR LooksMatch[textPos,patternPos])
THEN -- chars match -- { patternPos ← patternPos - 1; textPos ← textPos - 1 }
ELSE { -- chars don't match; try to increment some wild card position
WHILE stackPtr # 0 DO
txtpos: Offset ← textPosStack[stackPtr];
txtlen: Offset ← textLenStack[stackPtr];
IF interrupt#NIL AND interrupt^ THEN GO TO Return;
IF txtlen < 0 THEN { -- this is an incrementing wildcard
boundary: BOOLFALSE;
ppos: NAT;
charType ← GetCharProp[GetChar[txtpos !
noMoreChars => { boundary ← TRUE; CONTINUE }]];
IF ~boundary AND
(SELECT Pat[patternArray[(ppos←patternPosStack[stackPtr])-1]] FROM
anyNonAlphaPat => charType # alphaNumeric,
anyAlphaPat => charType = alphaNumeric,
anyNonBlankPat => charType # white,
anyBlankPat => charType = white,
anyStringPat => TRUE,
ENDCASE => ERROR) AND
(patternLooks=NIL OR LooksMatch[txtpos,ppos]) THEN {
patternPos ← ppos - 1;
textPos ← textPosStack[stackPtr] ← txtpos - 1;
EXIT }}
ELSE IF txtlen > 0 THEN { -- this is a decrementing wildcard
patternPos ← patternPosStack[stackPtr] - 1;
textPos ← textPosStack[stackPtr] - txtlen + 1;
textLenStack[stackPtr] ← txtlen - 1;
EXIT }
ELSE NULL; -- decrementing wildcard with no place left to go
stackPtr ← stackPtr - 1;
ENDLOOP;
IF stackPtr = 0 THEN -- failed to match a stacked wild card
IF patternAnchor < psLength AND textAnchor > start THEN {
-- there was a * with no looks, so can advance it
patternPos ← patternAnchor; textPos ← textAnchor ← textAnchor - 1 }
ELSE EXIT --start matching over at next text location-- }}}};
ENDCASE;
ENDLOOP; -- end of pattern loop
atEnd ← atEnd-1; -- start over with next character
ENDLOOP; -- end of text loop
EXITS Return => NULL;
};
CharBits: PROC [c: CHAR] RETURNS [CHAR] = INLINE {
RETURN [LOOPHOLE[Inline.BITAND[CharMask,c], CHAR]] };
END.