RegExpFindCompileImpl.mesa
Copyright © 1985 by Xerox Corporation. All rights reserved.
Nix, December 21, 1983 6:02 pm
Russ Atkinson (RRA) April 25, 1985 5:21:12 am PST
DIRECTORY
TextLooks USING [Looks],
RegularExpression USING [ParseTree, Code, CodeContent, PatternOpCodeArray, PatternDataArray, PatternNextArray, PatternLooksArray, IgnoreLooks, ParseTreeContent, OpCode, Index, MalformedPattern];
RegExpFindCompileImpl: CEDAR PROGRAM
IMPORTS RegularExpression
EXPORTS RegularExpression = {
OPEN RegularExpression;
Compile: PUBLIC PROC [p: ParseTree] RETURNS [code: Code] = {
CompileIt: PROC [p: ParseTree, self, next: Index] = {
WITH p SELECT FROM
x: REF ParseTreeContent.concat => {
FOR l: LIST OF ParseTree ← x.concats, l.rest UNTIL l = NIL DO
IF l.rest = NIL
THEN
CompileIt[l.first, self, next]
ELSE {
middle: Index ← Alloc[];
CompileIt[l.first, self, middle];
self ← middle;
};
ENDLOOP;
};
x: REF ParseTreeContent.char =>
Set[self, IF x.looks = IgnoreLooks THEN matchChar ELSE matchCharLooks, x.looks, ORD[x.char], next];
x: REF ParseTreeContent.charIC =>
Set[self, IF x.looks = IgnoreLooks THEN matchCharIC ELSE matchCharLooksIC, x.looks, ORD[x.char], next];
x: REF ParseTreeContent.class =>
Set[self, IF x.looks = IgnoreLooks THEN matchClass ELSE matchClassLooks, x.looks, x.classNumber, next];
x: REF ParseTreeContent.anyChar =>
Set[self, IF x.looks = IgnoreLooks THEN matchAnyChar ELSE matchAnyCharLooks, x.looks, notUsed, next];
x: REF ParseTreeContent.nodeBreak =>
Set[self, matchNodeBreak, IgnoreLooks, notUsed, next];
x: REF ParseTreeContent.skipToNodeBreak =>
Set[self, IF x.looks = IgnoreLooks THEN skipToNodeBreak ELSE skipToNodeBreakLooks, x.looks, notUsed, next];
x: REF ParseTreeContent.beginNode =>
Set[self, matchBeginNode, IgnoreLooks, notUsed, next];
x: REF ParseTreeContent.fail =>
Set[self, fail, IgnoreLooks, notUsed, next];
x: REF ParseTreeContent.succeed =>
Set[self, succeed, IgnoreLooks, notUsed, next];
x: REF ParseTreeContent.noOp =>
Set[self, noOp, IgnoreLooks, notUsed, next];
x: REF ParseTreeContent.skipOverClass =>
Set[self, IF x.looks = IgnoreLooks THEN skipOverClass ELSE skipOverClassLooks, x.looks, x.classNumber, next];
x: REF ParseTreeContent.skipOverChar =>
Set[self, IF x.looks = IgnoreLooks THEN skipOverChar ELSE skipOverCharLooks, x.looks, ORD[x.char], next];
x: REF ParseTreeContent.skipOverCharIC =>
Set[self, IF x.looks = IgnoreLooks THEN skipOverCharIC ELSE skipOverCharLooksIC, x.looks, ORD[x.char], next];
x: REF ParseTreeContent.skipOver =>
-- ERROR RegularExpression.MalformedPattern[invalidNot,0];
ERROR RegularExpression.MalformedPattern[toobig];
x: REF ParseTreeContent.skipToChar =>
Set[self, IF x.looks = IgnoreLooks THEN skipToChar ELSE skipToCharLooks, x.looks, ORD[x.char], next];
x: REF ParseTreeContent.skipToCharIC =>
Set[self, IF x.looks = IgnoreLooks THEN skipToCharIC ELSE skipToCharLooksIC, x.looks, ORD[x.char], next];
x: REF ParseTreeContent.skipToString => {
endOfStringInstructions: Index ← Alloc[];
stringInstructions: Index ← Alloc[];
Set[endOfStringInstructions, endOfSkipToString, IgnoreLooks, notUsed, next];
Set[self, skipToString, IgnoreLooks, notUsed, stringInstructions];
CompileIt[x.p, stringInstructions, endOfStringInstructions];
};
x: REF ParseTreeContent.skipTo =>
-- ERROR RegularExpression.MalformedPattern[invalidNot,0];
ERROR RegularExpression.MalformedPattern[toobig];
x: REF ParseTreeContent.skipToEnd =>
Set[self, skipToEnd, IgnoreLooks, notUsed, next];
x: REF ParseTreeContent.skipToBeginning =>
Set[self, skipToBeginning, IgnoreLooks, notUsed, next];
x: REF ParseTreeContent.closure => {
body: Index ← Alloc[];
Set[self, closure, IgnoreLooks, body, next];
CompileIt[x.p, body, self];
};
x: REF ParseTreeContent.carefulClosure => {
body: Index ← Alloc[];
bodyPlusOne: Index ← Alloc[];
check: Index ← Alloc[];
Set[self, carefulClosure, IgnoreLooks, body, next];
Set[check, carefulClosureEnd, IgnoreLooks, self, self];
CompileIt[x.p, body, check];
};
x: REF ParseTreeContent.greedyClosure => {
body: Index ← Alloc[];
Set[self, greedyClosure, IgnoreLooks, next, body];
CompileIt[x.p, body, self];
};
x: REF ParseTreeContent.carefulGreedyClosure => {
body: Index ← Alloc[];
bodyPlusOne: Index ← Alloc[];
check: Index ← Alloc[];
Set[self, carefulGreedyClosure, IgnoreLooks, next, body];
Set[check, carefulGreedyClosureEnd, IgnoreLooks, self, self];
CompileIt[x.p, body, check];
};
x: REF ParseTreeContent.alt => {
FOR l: LIST OF ParseTree ← x.alts, l.rest UNTIL l = NIL DO
IF l.rest = NIL
THEN
CompileIt[l.first, self, next]
ELSE {
instr: Index ← Alloc[];
nextAlt: Index ← Alloc[];
Set[self, alt, IgnoreLooks, nextAlt, instr];
CompileIt[l.first, instr, next];
self ← nextAlt;
};
ENDLOOP;
};
x: REF ParseTreeContent.field => {
instr: Index ← Alloc[];
endInstr: Index ← Alloc[];
Set[self, fieldStart, IgnoreLooks, x.number, instr];
IF x.bound # -1 THEN {
extraInstr: Index ← Alloc[];
Set[instr, boundNodeBreaks, IgnoreLooks, x.bound, extraInstr];
instr ← extraInstr;
};
Set[endInstr, fieldEnd, IgnoreLooks, x.number, next];
CompileIt[x.p, instr, endInstr];
};
x: REF ParseTreeContent.fieldEquals =>
Set[self, fieldEquals, IgnoreLooks, x.number, next];
x: REF ParseTreeContent.fieldEqualsIC =>
Set[self, fieldEqualsIC, IgnoreLooks, x.number, next];
x: REF ParseTreeContent.beginALL =>
Set[self, fieldStart, IgnoreLooks, 0, next];
x: REF ParseTreeContent.endALL =>
Set[self, fieldEnd, IgnoreLooks, 0, next];
x: REF ParseTreeContent.beginAll =>
Set[self, beginAll, IgnoreLooks, 0, next];
x: REF ParseTreeContent.endAll =>
Set[self, endAll, IgnoreLooks, 0, next];
ENDCASE => ERROR;
};
codePC: Index ← 0;
codeSize: Index ← 0;
StandardSizeIncrement: Index = 100;
notUsed: NAT = 0;
Alloc: PROC [] RETURNS [pc: Index] = {
pc ← codePC;
IF pc >= codeSize THEN {
newCodeSize: INT ← codeSize*2 + StandardSizeIncrement;
newCode: Code ← NEW[CodeContent ← [
NEW[PatternOpCodeArray[newCodeSize]],
NEW[PatternLooksArray[newCodeSize]],
NEW[PatternDataArray[newCodeSize]],
NEW[PatternNextArray[newCodeSize]]]];
IF codeSize > 0 THEN
FOR i: Index IN [0..pc) DO
newCode.opCodes[i] ← code.opCodes[i];
newCode.looks[i] ← code.looks[i];
newCode.data[i] ← code.data[i];
newCode.next[i] ← code.next[i];
ENDLOOP;
code ← newCode;
codeSize ← newCodeSize;
};
codePC ← codePC+1;
};
Set: PROC [pc: Index, opCode: OpCode, looks: TextLooks.Looks, data: NAT, next: Index] = {
code.opCodes[pc] ← opCode;
code.looks[pc] ← looks;
code.data[pc] ← data;
code.next[pc] ← next;
};
first: Index ← Alloc[];
CompileIt[p, first, 0];
};
}.