RegExpFind2Impl.mesa
Copyright © 1985 by Xerox Corporation. All rights reserved.
Nix, December 21, 1983 12:45 pm
Russ Atkinson (RRA) April 25, 1985 5:29:29 am PST
Peter Kessler September 30, 1986 10:54:52 am PDT
DIRECTORY
Ascii USING [Upper],
Basics USING [HighHalf, LongNumber, LowHalf],
LooksReader USING [Get, SetPosition, Ref],
RegularExpression USING [CharClass, ClassArray, FinderRecord, Index, LegalCharacters, OpCode, PatternDataArray, PatternLooksArray, PatternNextArray, PatternOpCodeArray, PatternStackArray, ReturnCode, ReturnCodeArray, Search, TextStackArray],
Rope USING [Fetch, ROPE, Size],
RopeEdit USING [GetCharProp],
RopeReader USING [Get, GetIndex, Ref, SetPosition],
RunReader USING [Ref],
TextEdit USING [GetRope, GetRuns],
TextLooks USING [Looks, Runs],
TextLooksSupport USING [LooksAND],
TextNode USING [Offset, RefTextNode];
RegExpFind2Impl: CEDAR PROGRAM
IMPORTS Ascii, Basics, LooksReader, RegularExpression, Rope, RopeEdit, RopeReader, TextEdit, TextLooksSupport
EXPORTS RegularExpression = {
OPEN RegularExpression;
Finder: TYPE = REF FinderRec;
FinderRec: PUBLIC TYPE = FinderRecord;
ROPE: TYPE = Rope.ROPE;
Offset: TYPE = TextNode.Offset;
RefTextNode: TYPE = TextNode.RefTextNode;
SearchRope: PUBLIC PROC [finder: Finder, rope: ROPE, start: Offset, len: Offset, interrupt: REF BOOL] RETURNS [found: BOOL, at, atEnd, before, after: Offset] = {
[found, at, atEnd, before, after] ← Search[finder, rope, NIL, start, len, FALSE, interrupt]
};
Try: PUBLIC PROC [finder: Finder, text: RefTextNode, start: Offset, len: Offset, looksExact: BOOL, interrupt: REF BOOL] RETURNS [found: BOOL, at, atEnd, before, after: Offset] = {
[found, at, atEnd, before, after] ←
Search[finder, TextEdit.GetRope[text], TextEdit.GetRuns[text], start, len, looksExact, interrupt];
};
Search: PUBLIC PROC [finder: Finder, rope: ROPE, runs: TextLooks.Runs, start: Offset, len: Offset, looksExact: BOOLEAN, interrupt: REF BOOLNIL] RETURNS [found: BOOLEAN, at, atEnd, before, after: Offset] = {
IF finder.wordSearch THEN DO -- repeat search until find a word
[found, at, atEnd, before, after] ← TryToFind[finder, rope, runs, start, len, looksExact, interrupt];
IF ~found OR (interrupt#NIL AND interrupt^) THEN RETURN; -- failed
IF IsWord[rope, at, atEnd] THEN RETURN; -- got it
start ← after; -- try again
ENDLOOP;
[found, at, atEnd, before, after] ←
TryToFind[finder, rope, runs, start, len, looksExact, interrupt];
};
IsWord: PROC [rope: ROPE, at, atEnd: Offset] RETURNS [BOOLEAN] = {
IF at > 0 AND
RopeEdit.GetCharProp[Rope.Fetch[rope,at-1]] = alphaNumeric THEN RETURN [FALSE];
IF atEnd < Rope.Size[rope] AND
RopeEdit.GetCharProp[Rope.Fetch[rope,atEnd]] = alphaNumeric THEN RETURN [FALSE];
RETURN [TRUE];
};
TryToFind: PROC [finder: Finder, rope: ROPE, runs: TextLooks.Runs, start: Offset, len: Offset, looksExact: BOOLEAN, interrupt: REF BOOL] RETURNS [found: BOOLEAN, at, atEnd, before, after: Offset] = {
Character: TYPE = Index;
currentChar: Character;
currentCHAR: CHAR;
end: Offset ← start+MIN[len, Rope.Size[rope]-start];
nextPos: Offset ← start;
NoMoreChars: Character = 666;
NoNodeBreakBound: Index = 32177;
pc: Index ← 0;
nodeBreakBound: Index ← NoNodeBreakBound;
opCode: REF PatternOpCodeArray ← finder.forwardProgram.opCodes;
looks: REF PatternLooksArray ← finder.forwardProgram.looks;
data: REF PatternDataArray ← finder.forwardProgram.data;
next: REF PatternNextArray ← finder.forwardProgram.next;
pcStack: REF PatternStackArray ← finder.stack.pc;
nextPosStack: REF TextStackArray ← finder.stack.nextPos;
returnCodeStack: REF ReturnCodeArray ← finder.stack.returnCode;
charClass: REF ClassArray ← finder.classes;
stackPos: Index ← 0;
ropeReader: RopeReader.Ref ← finder.ropeReader;
lksReader: LooksReader.Ref ← finder.lksReader;
runReader: RunReader.Ref ← finder.runReader;
AdvanceChar: PROC [] = {
IF nextPos < end
THEN {
currentChar ← (currentCHAR ← RopeReader.Get[ropeReader]) - 0C;
nextPos ← nextPos + 1;
}
ELSE {
currentChar ← NoMoreChars;
currentCHAR ← '\000;
nextPos ← end + 1;
};
};
PushPos: PROC [returnCode: ReturnCode] = {
pcStack[stackPos] ← pc;
nextPosStack[stackPos] ← nextPos;
returnCodeStack[stackPos] ← returnCode;
stackPos ← stackPos + 1;
};
TestLooks: PROC [] RETURNS [BOOL] = {
sourcelks: TextLooks.Looks;
IF runs=NIL THEN RETURN [FALSE]; -- pattern has looks and text doesn't
IF nextPos NOT IN (start..end] THEN RETURN [FALSE]; -- boundary char has no looks
LooksReader.SetPosition[lksReader, runs, nextPos-1];
sourcelks ← LooksReader.Get[lksReader];
RETURN[
looks[pc]=(IF looksExact THEN sourcelks ELSE TextLooksSupport.LooksAND[sourcelks, looks[pc]])];
};
Begin: PROC [] = {
IF start < end THEN RopeReader.SetPosition[ropeReader, rope, start];
AdvanceChar[];
};
DoFieldEquals: PROC [ignoreCase: BOOL] RETURNS [BOOL] = {
pos: Offset ← finder.nameArray[data[pc]].at;
WHILE pos < finder.nameArray[data[pc]].atEnd DO
c: CHAR ← rope.Fetch[pos];
IF ignoreCase
THEN {
IF Ascii.Upper[c] # Ascii.Upper[currentCHAR] THEN RETURN [FALSE];
}
ELSE
IF c # currentCHAR THEN RETURN [FALSE];
pos ← pos + 1;
AdvanceChar[];
ENDLOOP;
RETURN[TRUE];
};
DoFieldEqualsLooks: PROC [ignoreCase: BOOL] RETURNS [BOOL] = {
pos: Offset ← finder.nameArray[data[pc]].at;
IF runs=NIL THEN RETURN [FALSE]; -- pattern has looks and text doesn't
WHILE pos < finder.nameArray[data[pc]].atEnd DO
c: CHAR ← rope.Fetch[pos];
fieldLooks, nextLooks: TextLooks.Looks;
IF ignoreCase
THEN {
IF Ascii.Upper[c] # Ascii.Upper[currentCHAR] THEN RETURN [FALSE];
}
ELSE
IF c # currentCHAR THEN RETURN [FALSE];
IF nextPos NOT IN (start..end] THEN RETURN [FALSE];
LooksReader.SetPosition[lksReader, runs, pos];
fieldLooks ← LooksReader.Get[lksReader];
LooksReader.SetPosition[lksReader, runs, nextPos-1];
nextLooks ← LooksReader.Get[lksReader];
IF fieldLooks # (IF looksExact THEN nextLooks ELSE TextLooksSupport.LooksAND[fieldLooks, nextLooks]) THEN RETURN[FALSE];
pos ← pos + 1;
AdvanceChar[];
ENDLOOP;
RETURN[TRUE];
};
DoCarefulGreedyClosureEnd: PROC [] RETURNS [BOOL] = {
lastCarefulNextPos: Basics.LongNumber;
lastCarefulNextPos.lowbits ← data[next[data[pc]]+1];
lastCarefulNextPos.highbits ← next[next[data[pc]]+1];
TRUSTED {
RETURN[lastCarefulNextPos # LOOPHOLE[nextPos]];
};
};
Begin[];
DO
DO
{
SELECT opCode[pc] FROM
matchChar =>
IF currentChar = data[pc] THEN GO TO advance ELSE GO TO Failure;
matchCharIC =>
IF Ascii.Upper[currentCHAR] = VAL[data[pc]]
THEN
GO TO advance ELSE GO TO Failure;
matchCharLooks =>
IF currentChar = data[pc] AND TestLooks[]
THEN GO
TO advance ELSE GO TO Failure;
matchCharLooksIC =>
IF Ascii.Upper[currentCHAR] = VAL[data[pc]] AND TestLooks[]
THEN GO TO advance ELSE GO TO Failure;
matchClass =>
IF currentCHAR IN LegalCharacters AND charClass[data[pc]][currentCHAR]
THEN GO TO advance
ELSE GO TO Failure;
matchClassLooks =>
IF currentCHAR IN LegalCharacters AND
charClass[data[pc]][currentCHAR] AND
TestLooks[]
THEN GO TO advance ELSE GO TO Failure;
matchAnyChar =>
IF currentChar # NoMoreChars THEN GO TO advance ELSE GO TO Failure;
matchAnyCharLooks =>
IF currentChar # NoMoreChars AND TestLooks[]
THEN GO
TO advance ELSE GO TO Failure;
matchNodeBreak =>
IF ~(nextPos=Rope.Size[rope]+1 AND currentChar=NoMoreChars)
THEN GO TO Failure;
skipToNodeBreak => {
IF end < Rope.Size[rope] THEN GO TO Failure;
nextPos ← end;
RopeReader.SetPosition[ropeReader, rope, nextPos];
GO TO advance;
};
skipToNodeBreakLooks => {
IF end < Rope.Size[rope] THEN GO TO Failure;
WHILE currentChar # NoMoreChars DO
IF ~TestLooks[] THEN GO TO Failure;
AdvanceChar[];
ENDLOOP;
};
matchBeginNode => IF nextPos # 1 THEN GO TO Failure;
fail => GO TO Failure;
succeed => GO TO AbsoluteSuccess;
noOp => NULL;
skipOverClass => {
ccl: CharClass ← charClass[data[pc]];
WHILE currentCHAR IN LegalCharacters AND ccl[currentCHAR] DO
AdvanceChar[];
ENDLOOP;
GO TO failAtEnd;
};
skipOverClassLooks => {
ccl: CharClass ← charClass[data[pc]];
WHILE currentCHAR IN LegalCharacters AND ccl[currentCHAR] AND TestLooks[] DO
AdvanceChar[];
ENDLOOP;
GO TO failAtEnd;
};
skipOverChar => {
c: CHARVAL[data[pc]];
WHILE currentCHAR = c DO
AdvanceChar[];
ENDLOOP;
GO TO failAtEnd;
};
skipOverCharLooks => {
c: CHARVAL[data[pc]];
WHILE currentCHAR = c AND TestLooks[] DO
AdvanceChar[];
ENDLOOP;
GO TO failAtEnd;
};
skipOverCharIC => {
c: CHARVAL[data[pc]];
WHILE Ascii.Upper[currentCHAR] = c DO
AdvanceChar[];
ENDLOOP;
GO TO failAtEnd;
};
skipOverCharLooksIC => {
c: CHARVAL[data[pc]];
WHILE Ascii.Upper[currentCHAR] = c AND TestLooks[] DO
AdvanceChar[];
ENDLOOP;
GO TO failAtEnd;
};
skipToChar => {
c: CHARVAL[data[pc]];
UNTIL currentCHAR = c DO
IF currentChar = NoMoreChars THEN GO TO Failure;
AdvanceChar[];
ENDLOOP;
};
skipToCharLooks => {
c: CHARVAL[data[pc]];
UNTIL currentCHAR = c AND TestLooks[] DO
IF currentChar = NoMoreChars THEN GO TO Failure;
AdvanceChar[];
ENDLOOP;
};
skipToCharIC => {
c: CHARVAL[data[pc]];
UNTIL Ascii.Upper[currentCHAR] = c DO
IF currentChar = NoMoreChars THEN GO TO Failure;
AdvanceChar[];
ENDLOOP;
};
skipToCharLooksIC => {
c: CHARVAL[data[pc]];
UNTIL Ascii.Upper[currentCHAR] = c AND TestLooks[] DO
IF currentChar = NoMoreChars THEN GO TO Failure;
AdvanceChar[];
ENDLOOP;
};
skipToString => {
PushPos[skipToStringRet];
};
endOfSkipToString => {
stackPos ← stackPos-1;
nextPos ← nextPosStack[stackPos]-1;
RopeReader.SetPosition[ropeReader, rope, nextPos];
GO TO advance;
};
skipToEnd => {
nextPos ← end;
GO TO advance;
};
skipToBeginning => {
nextPos ← start;
GO TO advance;
};
closure, greedyClosure, alt =>
PushPos[closureRet];
carefulClosure => {
PushPos[closureRet];
data[data[pc]+1] ← Basics.LowHalf[nextPos];
next[data[pc]+1] ← Basics.HighHalf[nextPos];
};
carefulClosure, carefulGreedyClosure => {
PushPos[closureRet];
data[next[pc]+1] ← Basics.LowHalf[nextPos];
next[next[pc]+1] ← Basics.HighHalf[nextPos];
};
carefulClosureEnd => {
lastCarefulNextPos: Basics.LongNumber;
lastCarefulNextPos.lowbits ← data[data[data[pc]]+1];
lastCarefulNextPos.highbits ← next[data[data[pc]]+1];
IF lastCarefulNextPos = LOOPHOLE[nextPos] THEN GO TO Failure;
};
carefulGreedyClosureEnd => IF ~DoCarefulGreedyClosureEnd[] THEN GO TO Failure;
fieldStart => finder.nameArray[data[pc]].at ← nextPos-1;
fieldEnd => finder.nameArray[data[pc]].atEnd ← MAX[nextPos-1, finder.nameArray[data[pc]].at];
boundNodeBreaks => nodeBreakBound ← data[pc];
fieldEquals => IF ~DoFieldEquals[FALSE] THEN GO TO Failure;
fieldEqualsLooks => IF ~DoFieldEqualsLooks[FALSE] THEN GO TO Failure;
fieldEqualsIC => IF ~DoFieldEquals[TRUE] THEN GO TO Failure;
fieldEqualsLooksIC => IF ~DoFieldEqualsLooks[TRUE] THEN GO TO Failure;
beginAll => before ← nextPos-1;
endAll => {
after ← MAX[nextPos-1, before];
GO TO AbsoluteSuccess;
};
ENDCASE => ERROR;
EXITS
failAtEnd => IF currentChar = NoMoreChars THEN GO TO Failure;
advance => AdvanceChar[];
};
pc ← next[pc];
REPEAT
Failure => {
IF interrupt # NIL AND interrupt^ THEN GO TO AbsoluteFailure;
IF stackPos = 0 THEN GO TO AbsoluteFailure;
stackPos ← stackPos - 1;
SELECT returnCodeStack[stackPos] FROM
skipToStringRet => {
pc ← pcStack[stackPos];
nextPos ← nextPosStack[stackPos];
};
closureRet, greedyClosureRet, altRet, carefulClosureRet, carefulGreedyClosureRet => {
pc ← data[pcStack[stackPos]];
nextPos ← nextPosStack[stackPos]-1;
};
ENDCASE => ERROR;
RopeReader.SetPosition[ropeReader, rope, nextPos];
AdvanceChar[];
};
ENDLOOP;
REPEAT
AbsoluteFailure => {
found ← FALSE;
};
AbsoluteSuccess => {
found ← TRUE;
at ← finder.nameArray[0].at;
atEnd ← finder.nameArray[0].atEnd;
};
ENDLOOP;
};
SearchRopeBackwards: PUBLIC PROC [finder: Finder, rope: ROPE, start: Offset, len: Offset, interrupt: REF BOOLNIL] RETURNS [found: BOOLEAN, at, atEnd, before, after: Offset] = {
[found, at, atEnd, before, after] ←
RegularExpression.Search[finder, rope, NIL, start, len, FALSE, interrupt];
};
TryBackwards: PUBLIC PROC [finder: Finder, text: RefTextNode, start: Offset, len: Offset, looksExact: BOOLEANFALSE, interrupt: REF BOOLNIL] RETURNS [found: BOOLEAN, at, atEnd, before, after: Offset] = {
[found, at, atEnd, before, after] ←
RegularExpression.Search[finder, TextEdit.GetRope[text], TextEdit.GetRuns[text],
start, len, looksExact, interrupt];
};
}.