DIRECTORY
LooksReader USING [Create, Get, SetPosition],
Rope USING [Cat, Equal, Fetch, FromChar, Map, ROPE, Size, Substr],
RopeEdit USING [AlphaNumericChar, BlankChar, LowerCase, MaxLen, Offset, UpperCase],
RopeReader USING [Create, Get, GetIndex, GetRope, Peek, ReadOffEnd, SetPosition],
RunReader USING [Create, Get, GetIndex, GetRuns, NoMoreRuns, SetPosition],
TextEdit USING [GetRope, GetRuns],
TextFind USING [PatternErrorCode, RefTextNode],
TextFindPrivate USING [anyAlphaPattern, anyBlankPattern, anyNonAlphaPattern, anyNonBlankPattern, anyStringPattern, FinderRecord, leftBoundaryPattern, leftBracketPattern, LooksArray, maxAlphaPattern, maxBlankPattern, maxNonAlphaPattern, maxNonBlankPattern, MaxPatternLength, maxStringPattern, NameArray, nopPattern, oneAlphaPattern, oneBlankPattern, oneCharPattern, oneNonAlphaPattern, oneNonBlankPattern, PatternArray, PatternStackArray, rightBoundaryPattern, rightBracketPattern, TextStackArray],
TextLooks USING [FetchLooks, Looks, noLooks, Runs],
TextLooksSupport USING [],
TextNode USING [pZone];
TextFindImpl:
CEDAR
PROGRAM
IMPORTS TextEdit, TextLooks, TextLooksSupport,
LooksReader, RopeEdit, RopeReader, TextNode,
RunReader, Rope
EXPORTS TextFind = { OPEN TextFind;
ROPE: TYPE ~ Rope.ROPE;
MalformedPattern: PUBLIC ERROR [ec:PatternErrorCode] = CODE;
Finder: TYPE = REF FinderRec;
FinderRec: PUBLIC TYPE = TextFindPrivate.FinderRecord;
***** Operations *****
NameLoc: PUBLIC PROC [finder: Finder, name: ROPE]
RETURNS [at, atEnd: RopeEdit.Offset] = {
OPEN finder;
at ← atEnd ← 0;
IF finder = NIL OR nameArray = NIL THEN RETURN;
FOR i:
NAT
IN [0..nameArray.length)
DO
IF Rope.Equal[nameArray[i].name, name] THEN
RETURN [nameArray[i].at,nameArray[i].atEnd];
ENDLOOP;
};
NameLooks: PUBLIC PROC [finder: Finder, name: ROPE]
RETURNS [looks: TextLooks.Looks] = {
OPEN finder;
looks ← TextLooks.noLooks;
IF finder = NIL OR nameArray = NIL THEN RETURN;
FOR i:
NAT
IN [0..nameArray.length)
DO
IF Rope.Equal[nameArray[i].name, name] THEN RETURN [nameArray[i].looks];
ENDLOOP;
};
Create:
PUBLIC
PROC [pattern: RefTextNode,
literal, word, ignoreLooks, ignoreCase, addBounds:
BOOLEAN ←
FALSE,
patternStart: RopeEdit.Offset ← 0, patternLen: RopeEdit.Offset ← RopeEdit.MaxLen]
RETURNS [finder: Finder] = {
patternRope: ROPE ← TextEdit.GetRope[pattern];
patternRuns: TextLooks.Runs ← TextEdit.GetRuns[pattern];
RETURN [CreateFromParts[patternRope,patternRuns,literal,word,
ignoreLooks,ignoreCase,addBounds,patternStart,patternLen]]
};
CreateFromRope:
PUBLIC
PROC [
pattern:
ROPE, literal, word, ignoreCase, addBounds:
BOOLEAN ←
FALSE,
patternStart: RopeEdit.Offset ← 0, patternLen: RopeEdit.Offset ← RopeEdit.MaxLen]
RETURNS [finder: Finder] = {
RETURN [CreateFromParts[pattern,
NIL,literal,word,
TRUE,
ignoreCase,addBounds,patternStart,patternLen]]
};
CreateFromParts:
PROC [patternRope:
ROPE, patternRuns: TextLooks.Runs,
literal, word, ignoreLooks, ignoreCase, addBounds:
BOOLEAN ←
FALSE,
patternStart: RopeEdit.Offset ← 0, patternLen: RopeEdit.Offset ← RopeEdit.MaxLen]
RETURNS [finder: Finder] = {
NewLooks:
PROC [num:
NAT]
RETURNS [array:
REF TextFindPrivate.LooksArray] = {
array ← TextNode.pZone.NEW[TextFindPrivate.LooksArray[num]];
FOR i:NAT IN [0..num) DO array[i] ← TextLooks.noLooks; ENDLOOP
};
char, patternChar: CHAR ← 377C;
pLen: RopeEdit.Offset;
patternLength, plen, psIndex, nameCount: NAT ← 0;
nameList: LIST OF Rope.ROPE; -- in reverse order of appearance
nameLooksList: LIST OF TextLooks.Looks;
nameLooks: TextLooks.Looks;
insideNamedPat: BOOLEAN ← FALSE;
IF addBounds
THEN {
-- add |'s to both ends of pattern
IF literal
THEN {
-- put quotes before special chars in the pattern
new: Rope.ROPE;
AddQuotes:
SAFE
PROC [c:
CHAR]
RETURNS [stop:
BOOL] =
TRUSTED {
IF ~RopeEdit.BlankChar[c] AND ~RopeEdit.AlphaNumericChar[c] THEN
new ← Rope.Cat[new, "'"]; -- quote chars that are not blank or alpha or digit
new ← Rope.Cat[new, Rope.FromChar[c]];
RETURN [FALSE]
};
[] ← Rope.Map[base: patternRope, action: AddQuotes];
patternRope ← new; literal ← FALSE;
};
patternRope ← Rope.Cat["|", Rope.Cat[patternRope, "|"]];
pLen ← Rope.Size[patternRope];
patternStart ← MIN[patternStart,pLen];
IF (patternLen ← MIN[patternLen,pLen-patternStart]) > TextFindPrivate.MaxPatternLength THEN ERROR MalformedPattern[toobig];
patternLength ← plen ← patternLen;
finder ← TextNode.pZone.NEW[FinderRec];
{
OPEN finder;
PatternProc: TYPE = PROC [char: CHAR, looks: TextLooks.Looks, ignoreCase: BOOLEAN];
patProc: PatternProc = IF literal THEN LitChar ELSE PatChar;
GetLooks:
PROC
RETURNS [lks: TextLooks.Looks] = {
RETURN [
IF lksReader =
NIL
THEN TextLooks.noLooks
ELSE
LooksReader.Get[lksReader ! RunReader.NoMoreRuns => {
lks ← TextLooks.noLooks; CONTINUE
}]]
};
LitChar: PatternProc =
TRUSTED {
IF looks # TextLooks.noLooks
THEN {
IF patternLooks = NIL THEN patternLooks ← NewLooks[patternLength];
patternLooks[psIndex] ← looks;
};
IF ignoreCase
AND (char
IN ['A..'Z]
OR char
IN ['a..'z])
THEN {
patternArray[psIndex] ← [pattern[char+200B]];
200B tells matcher to check both upper and lower
IF psIndex = 0
THEN {
firstPatternCharIsNormal ← TRUE;
firstPatChar1 ← RopeEdit.UpperCase[char];
firstPatChar2 ← RopeEdit.LowerCase[char];
};
IF psIndex = patternLength-1
THEN {
lastPatternCharIsNormal ← TRUE;
lastPatChar1 ← RopeEdit.UpperCase[char];
lastPatChar2 ← RopeEdit.LowerCase[char];
}
}
ELSE {
patternArray[psIndex] ← [pattern[char]];
IF psIndex = patternLength-1
THEN {
lastPatternCharIsNormal ← TRUE;
lastPatChar1 ← char;
lastPatChar2 ← 0C;
};
IF psIndex = 0
THEN {
firstPatternCharIsNormal ← TRUE;
firstPatChar1 ← char;
firstPatChar2 ← 0C;
}
}
};
NotChar: PatternProc =
TRUSTED {
IF looks # TextLooks.noLooks
THEN {
IF patternLooks = NIL THEN patternLooks ← NewLooks[patternLength];
patternLooks[psIndex] ← looks;
};
IF ignoreCase
THEN patternArray[psIndex] ← [not[char+200B]]
ELSE patternArray[psIndex] ← [not[char]];
};
PatChar: PatternProc =
TRUSTED {
IF looks # TextLooks.noLooks AND patternLooks = NIL THEN
patternLooks ← NewLooks[patternLength];
IF patternLooks # NIL THEN patternLooks[psIndex] ← looks;
SELECT char
FROM
'' => {
IF RopeReader.GetIndex[ropeReader] >= plen THEN ERROR MalformedPattern[endquote];
patternLength ← patternLength-1;
LitChar[RopeReader.Get[ropeReader],GetLooks[],FALSE]
};
IN ['A .. 'Z], IN ['a .. 'z] => LitChar[char,looks,ignoreCase];
'~ => {
IF RopeReader.GetIndex[ropeReader] >= plen THEN ERROR MalformedPattern[endtilda];
patternLength ← patternLength-1;
char ← RopeReader.Get[ropeReader];
looks ← GetLooks[];
SELECT char
FROM
'' => {
IF RopeReader.GetIndex[ropeReader] >= plen THEN
ERROR MalformedPattern[endquote];
patternLength ← patternLength-1;
NotChar[RopeReader.Get[ropeReader],GetLooks[],FALSE]
};
IN ['A .. 'Z], IN ['a .. 'z] => NotChar[char,looks,ignoreCase];
'% => patternArray[psIndex] ← [pattern[TextFindPrivate.oneNonBlankPattern]];
'$ =>
IF psIndex > 0 AND patternArray[psIndex-1]=[pattern[TextFindPrivate.anyNonBlankPattern]] AND
(patternLooks=
NIL
OR patternLooks[psIndex-1]=looks)
THEN {
-- change to max
patternLength ← patternLength-1;
psIndex ← psIndex-1;
patternArray[psIndex] ← [pattern[TextFindPrivate.maxNonBlankPattern]];
}
ELSE {
-- new entry
patternArray[psIndex] ← [pattern[TextFindPrivate.anyNonBlankPattern]];
stackSize ← stackSize+1;
};
'@ => patternArray[psIndex] ← [pattern[TextFindPrivate.oneNonAlphaPattern]];
'& =>
IF psIndex > 0 AND patternArray[psIndex-1]=[pattern[TextFindPrivate.anyNonAlphaPattern]] AND
(patternLooks=
NIL
OR patternLooks[psIndex-1]=looks)
THEN {
-- change to max
patternLength ← patternLength-1;
psIndex ← psIndex-1;
patternArray[psIndex] ← [pattern[TextFindPrivate.maxNonAlphaPattern]]
}
ELSE {
-- new entry
patternArray[psIndex] ← [pattern[TextFindPrivate.anyNonAlphaPattern]];
stackSize ← stackSize+1;
};
ENDCASE => patternArray[psIndex] ← [not[char]];
};
'# => patternArray[psIndex] ← [pattern[TextFindPrivate.oneCharPattern]];
'* =>
IF psIndex > 0 AND patternArray[psIndex-1]=[pattern[TextFindPrivate.anyStringPattern]] AND
(patternLooks=
NIL
OR patternLooks[psIndex-1]=looks)
THEN {
-- change to max
psIndex ← psIndex-1;
patternLength ← patternLength-1;
stackSize ← MAX[1, stackSize];
patternArray[psIndex] ← [pattern[TextFindPrivate.maxStringPattern]];
}
ELSE {
-- new entry
patternArray[psIndex] ← [pattern[TextFindPrivate.anyStringPattern]];
IF looks # TextLooks.noLooks THEN stackSize ← stackSize+1;
};
'% => patternArray[psIndex] ← [pattern[TextFindPrivate.oneBlankPattern]];
'$ =>
IF psIndex > 0 AND patternArray[psIndex-1]=[pattern[TextFindPrivate.anyBlankPattern]] AND
(patternLooks=
NIL
OR patternLooks[psIndex-1]=looks)
THEN {
-- change to max
patternLength ← patternLength-1;
psIndex ← psIndex-1;
patternArray[psIndex] ← [pattern[TextFindPrivate.maxBlankPattern]]
}
ELSE {
-- new entry
patternArray[psIndex] ← [pattern[TextFindPrivate.anyBlankPattern]];
stackSize ← stackSize+1
};
'@ => patternArray[psIndex] ← [pattern[TextFindPrivate.oneAlphaPattern]];
'& =>
IF psIndex > 0 AND patternArray[psIndex-1]=[pattern[TextFindPrivate.anyAlphaPattern]] AND
(patternLooks=
NIL
OR patternLooks[psIndex-1]=looks)
THEN {
-- change to max
patternLength ← patternLength-1;
psIndex ← psIndex-1;
patternArray[psIndex] ← [pattern[TextFindPrivate.maxAlphaPattern]]
}
ELSE {
-- new entry
patternArray[psIndex] ← [pattern[TextFindPrivate.anyAlphaPattern]];
stackSize ← stackSize+1;
};
'| => {
patternArray[psIndex] ←
[pattern[IF psIndex = 0 THEN TextFindPrivate.leftBoundaryPattern ELSE TextFindPrivate.rightBoundaryPattern]];
IF psIndex # 0 AND psIndex # patternLength-1 THEN
ERROR MalformedPattern[boundary];
IF psIndex = patternLength-1
THEN {
-- right boundary
lastPatternCharIsNormal ← TRUE; lastPatChar1 ← TextFindPrivate.rightBoundaryPattern;
};
IF psIndex = 0
THEN {
--left boundary
firstPatternCharIsNormal ← TRUE; firstPatChar1 ← TextFindPrivate.leftBoundaryPattern;
}
};
'< => {
nameStart: RopeEdit.Offset ← RopeReader.GetIndex[ropeReader]; -- index of char after the <
nameLen: RopeEdit.Offset ← 0;
IF insideNamedPat THEN ERROR MalformedPattern[missingNameEnd];
insideNamedPat ← TRUE;
nameLooks ← looks; -- remember the looks of the <
patternArray[psIndex] ← [startname[nameCount]];
DO
SELECT RopeReader.Peek[ropeReader !
RopeReader.ReadOffEnd => GOTO BadName] FROM -- scan to end of name
': => {
-- pattern follows
[] ← RopeReader.Get[ropeReader];
[] ← GetLooks[];
patternLength ← patternLength-(nameLen+1);
EXIT;
};
'> => {
-- no pattern given, so insert a phony *
psIndex ← psIndex + 1;
PatChar['*,looks,ignoreCase]; -- use looks from the '<
patternLength ← patternLength-nameLen+1;
EXIT;
};
ENDCASE => { -- part of the name
nameLen ← nameLen+1;
[] ← RopeReader.Get[ropeReader];
[] ← GetLooks[];
};
ENDLOOP;
nameList ← TextNode.pZone.CONS[Rope.Substr[patternRope,nameStart,nameLen],nameList];
nameLooksList ← TextNode.pZone.CONS[nameLooks,nameLooksList];
EXITS BadName => ERROR MalformedPattern[missingNameEnd]
};
'> => {
IF ~insideNamedPat THEN ERROR MalformedPattern[unmatchedNameEnd];
insideNamedPat ← FALSE;
patternArray[psIndex] ← [endname[nameCount]];
nameCount ← nameCount+1;
};
'{ => {
leftBracketSeen ← TRUE;
patternArray[psIndex] ← [pattern[TextFindPrivate.leftBracketPattern]];
};
'} => {
IF rightBracketSeen THEN
patternArray[psIndex] ← [pattern[TextFindPrivate.nopPattern]]
-- use first } in pattern
ELSE {
rightBracketSeen ← TRUE;
patternArray[psIndex] ← [pattern[TextFindPrivate.rightBracketPattern]]
}
};
ENDCASE => {
patternArray[psIndex] ← [pattern[char]];
IF psIndex = patternLength-1
THEN {
lastPatternCharIsNormal ← TRUE;
lastPatChar1 ← char;
IF psIndex = 0
THEN {
firstPatternCharIsNormal ← TRUE;
firstPatChar1 ← char;
}
}; -- end of PatChar
IF word
THEN wordSearch ←
TRUE
so Try will know to make sure don't have adjacent alphanumerics
ELSE
IF patternLength=2
AND ~literal
AND ~ignoreLooks
AND Rope.Fetch[patternRope,patternStart]='#
AND Rope.Fetch[patternRope,patternStart+1]='*
THEN {
-- for looks-only searches
looks ← TextLooks.FetchLooks[patternRuns,patternStart];
looksOnly ← TRUE;
runReader ← RunReader.Create[];
RETURN;
};
patternArray ← TextNode.pZone.NEW[TextFindPrivate.PatternArray[patternLength]];
ropeReader ← RopeReader.Create[];
RopeReader.SetPosition[ropeReader,patternRope,patternStart];
IF patternRuns #
NIL
AND ~ignoreLooks
THEN {
lksReader ← LooksReader.Create[];
LooksReader.SetPosition[lksReader,patternRuns,patternStart];
};
psIndex ← 0;
DO
-- unpack the pattern
char: CHAR ← RopeReader.Get[ropeReader ! RopeReader.ReadOffEnd => EXIT];
patProc[char,GetLooks[],ignoreCase];
psIndex ← psIndex + 1;
ENDLOOP;
IF insideNamedPat THEN ERROR MalformedPattern[missingNameEnd]; -- mfp
length ← patternLength;
IF stackSize > 0
THEN {
stackSize ← stackSize+1;
textPosStack ← TextNode.pZone.NEW[TextFindPrivate.TextStackArray[stackSize]];
textLenStack ← TextNode.pZone.NEW[TextFindPrivate.TextStackArray[stackSize]];
patternPosStack ← TextNode.pZone.NEW[TextFindPrivate.PatternStackArray[stackSize]];
};
IF nameList #
NIL
THEN {
nameArray ← TextNode.pZone.NEW[TextFindPrivate.NameArray[nameCount]];
FOR i:
NAT
DECREASING
IN [0..nameCount)
DO
nameArray[i].name ← nameList.first;
nameArray[i].looks ← nameLooksList.first;
nameList ← nameList.rest;
nameLooksList ← nameLooksList.rest;
ENDLOOP;
};
}; -- of OPEN finder
}; -- of Create
Start: PUBLIC PROC = {};
}.