[Cyan]<CedarChest6.1>RegularExpression>RegExpFindCompileImpl.mesa!1

RegExpFindCompileImpl.mesa

Nix, December 21, 1983 6:02 pm

Russ Atkinson (RRA) April 25, 1985 5:21:12 am PST

DIRECTORY

TextLooks USING [Looks],

RegularExpression USING [ParseTree, Code, CodeContent, PatternOpCodeArray, PatternDataArray, PatternNextArray, PatternLooksArray, IgnoreLooks, ParseTreeContent, OpCode, Index, MalformedPattern];

RegExpFindCompileImpl: CEDAR PROGRAM

IMPORTS RegularExpression

EXPORTS RegularExpression = {

OPEN RegularExpression;

Compile: PUBLIC PROC [p: ParseTree] RETURNS [code: Code] = {

CompileIt: PROC [p: ParseTree, self, next: Index] = {

WITH p SELECT FROM

x: REF ParseTreeContent.concat => {

FOR l: LIST OF ParseTree ← x.concats, l.rest UNTIL l = NIL DO

IF l.rest = NIL

THEN

CompileIt[l.first, self, next]

ELSE {

middle: Index ← Alloc[];

CompileIt[l.first, self, middle];

self ← middle;

};

ENDLOOP;

};

x: REF ParseTreeContent.char =>

Set[self, IF x.looks = IgnoreLooks THEN matchChar ELSE matchCharLooks, x.looks, ORD[x.char], next];

x: REF ParseTreeContent.charIC =>

Set[self, IF x.looks = IgnoreLooks THEN matchCharIC ELSE matchCharLooksIC, x.looks, ORD[x.char], next];

x: REF ParseTreeContent.class =>

Set[self, IF x.looks = IgnoreLooks THEN matchClass ELSE matchClassLooks, x.looks, x.classNumber, next];

x: REF ParseTreeContent.anyChar =>

Set[self, IF x.looks = IgnoreLooks THEN matchAnyChar ELSE matchAnyCharLooks, x.looks, notUsed, next];

x: REF ParseTreeContent.nodeBreak =>

Set[self, matchNodeBreak, IgnoreLooks, notUsed, next];

x: REF ParseTreeContent.skipToNodeBreak =>

Set[self, IF x.looks = IgnoreLooks THEN skipToNodeBreak ELSE skipToNodeBreakLooks, x.looks, notUsed, next];

x: REF ParseTreeContent.beginNode =>

Set[self, matchBeginNode, IgnoreLooks, notUsed, next];

x: REF ParseTreeContent.fail =>

Set[self, fail, IgnoreLooks, notUsed, next];

x: REF ParseTreeContent.succeed =>

Set[self, succeed, IgnoreLooks, notUsed, next];

x: REF ParseTreeContent.noOp =>

Set[self, noOp, IgnoreLooks, notUsed, next];

x: REF ParseTreeContent.skipOverClass =>

Set[self, IF x.looks = IgnoreLooks THEN skipOverClass ELSE skipOverClassLooks, x.looks, x.classNumber, next];

x: REF ParseTreeContent.skipOverChar =>

Set[self, IF x.looks = IgnoreLooks THEN skipOverChar ELSE skipOverCharLooks, x.looks, ORD[x.char], next];

x: REF ParseTreeContent.skipOverCharIC =>

Set[self, IF x.looks = IgnoreLooks THEN skipOverCharIC ELSE skipOverCharLooksIC, x.looks, ORD[x.char], next];

x: REF ParseTreeContent.skipOver =>

-- ERROR RegularExpression.MalformedPattern[invalidNot,0];

ERROR RegularExpression.MalformedPattern[toobig];

x: REF ParseTreeContent.skipToChar =>

Set[self, IF x.looks = IgnoreLooks THEN skipToChar ELSE skipToCharLooks, x.looks, ORD[x.char], next];

x: REF ParseTreeContent.skipToCharIC =>

Set[self, IF x.looks = IgnoreLooks THEN skipToCharIC ELSE skipToCharLooksIC, x.looks, ORD[x.char], next];

x: REF ParseTreeContent.skipToString => {

endOfStringInstructions: Index ← Alloc[];

stringInstructions: Index ← Alloc[];

Set[endOfStringInstructions, endOfSkipToString, IgnoreLooks, notUsed, next];

Set[self, skipToString, IgnoreLooks, notUsed, stringInstructions];

CompileIt[x.p, stringInstructions, endOfStringInstructions];

};

x: REF ParseTreeContent.skipTo =>

-- ERROR RegularExpression.MalformedPattern[invalidNot,0];

ERROR RegularExpression.MalformedPattern[toobig];

x: REF ParseTreeContent.skipToEnd =>

Set[self, skipToEnd, IgnoreLooks, notUsed, next];

x: REF ParseTreeContent.skipToBeginning =>

Set[self, skipToBeginning, IgnoreLooks, notUsed, next];

x: REF ParseTreeContent.closure => {

body: Index ← Alloc[];

Set[self, closure, IgnoreLooks, body, next];

CompileIt[x.p, body, self];

};

x: REF ParseTreeContent.carefulClosure => {

body: Index ← Alloc[];

bodyPlusOne: Index ← Alloc[];

check: Index ← Alloc[];

Set[self, carefulClosure, IgnoreLooks, body, next];

Set[check, carefulClosureEnd, IgnoreLooks, self, self];

CompileIt[x.p, body, check];

};

x: REF ParseTreeContent.greedyClosure => {

body: Index ← Alloc[];

Set[self, greedyClosure, IgnoreLooks, next, body];

CompileIt[x.p, body, self];

};

x: REF ParseTreeContent.carefulGreedyClosure => {

body: Index ← Alloc[];

bodyPlusOne: Index ← Alloc[];

check: Index ← Alloc[];

Set[self, carefulGreedyClosure, IgnoreLooks, next, body];

Set[check, carefulGreedyClosureEnd, IgnoreLooks, self, self];

CompileIt[x.p, body, check];

};

x: REF ParseTreeContent.alt => {

FOR l: LIST OF ParseTree ← x.alts, l.rest UNTIL l = NIL DO

IF l.rest = NIL

THEN

CompileIt[l.first, self, next]

ELSE {

instr: Index ← Alloc[];

nextAlt: Index ← Alloc[];

Set[self, alt, IgnoreLooks, nextAlt, instr];

CompileIt[l.first, instr, next];

self ← nextAlt;

};

ENDLOOP;

};

x: REF ParseTreeContent.field => {

instr: Index ← Alloc[];

endInstr: Index ← Alloc[];

Set[self, fieldStart, IgnoreLooks, x.number, instr];

IF x.bound # -1 THEN {

extraInstr: Index ← Alloc[];

Set[instr, boundNodeBreaks, IgnoreLooks, x.bound, extraInstr];

instr ← extraInstr;

};

Set[endInstr, fieldEnd, IgnoreLooks, x.number, next];

CompileIt[x.p, instr, endInstr];

};

x: REF ParseTreeContent.fieldEquals =>

Set[self, fieldEquals, IgnoreLooks, x.number, next];

x: REF ParseTreeContent.fieldEqualsIC =>

Set[self, fieldEqualsIC, IgnoreLooks, x.number, next];

x: REF ParseTreeContent.beginALL =>

Set[self, fieldStart, IgnoreLooks, 0, next];

x: REF ParseTreeContent.endALL =>

Set[self, fieldEnd, IgnoreLooks, 0, next];

x: REF ParseTreeContent.beginAll =>

Set[self, beginAll, IgnoreLooks, 0, next];

x: REF ParseTreeContent.endAll =>

Set[self, endAll, IgnoreLooks, 0, next];

ENDCASE => ERROR;

};

codePC: Index ← 0;

codeSize: Index ← 0;

StandardSizeIncrement: Index = 100;

notUsed: NAT = 0;

Alloc: PROC [] RETURNS [pc: Index] = {

pc ← codePC;

IF pc >= codeSize THEN {

newCodeSize: INT ← codeSize*2 + StandardSizeIncrement;

newCode: Code ← NEW[CodeContent ← [

NEW[PatternOpCodeArray[newCodeSize]],

NEW[PatternLooksArray[newCodeSize]],

NEW[PatternDataArray[newCodeSize]],

NEW[PatternNextArray[newCodeSize]]]];

IF codeSize > 0 THEN

FOR i: Index IN [0..pc) DO

newCode.opCodes[i] ← code.opCodes[i];

newCode.looks[i] ← code.looks[i];

newCode.data[i] ← code.data[i];

newCode.next[i] ← code.next[i];

ENDLOOP;

code ← newCode;

codeSize ← newCodeSize;

};

codePC ← codePC+1;

};

Set: PROC [pc: Index, opCode: OpCode, looks: TextLooks.Looks, data: NAT, next: Index] = {

code.opCodes[pc] ← opCode;

code.looks[pc] ← looks;

code.data[pc] ← data;

code.next[pc] ← next;

};

first: Index ← Alloc[];

CompileIt[p, first, 0];

};