JunoParserImpl.mesa (was LexerImpl + ParseTableImpl + ParserImpl)
Copied from ParserImpl.mesa, last Edited by: GNelson, January 10, 1983 5:53 pm
Merged with Parser.mesa, last Edited by: GNelson, January 10, 1983 5:50 pm
Merged with ParseTableImpl.mesa, last Edited by GNelson, March 1, 1983 6:11 pm
Merged with ParseTable.mesa, last Edited by: GNelson, August 24, 1983 10:30 pm
Merged with LexerImpl.mesa, coded by GNelson (?), December 7, 1982 3:26 pm
Merged with Lexer.mesa, coded by by GNelson (?), September 9, 1982 4:13 pm

Last Edited by: Stolfi, June 15, 1984 8:07:29 am PDT

DIRECTORY

JunoParseUnparse,
Atom USING [MakeAtom, PutProp, GetProp, MakeAtomFromChar],
IO USING [CR, LF, SP, NUL, FF, TAB],
Rope USING [ROPE, Cat, Fetch, Substr, FromChar, Length];

JunoParserImpl
: CEDAR PROGRAM
IMPORTS
Rope, Atom
EXPORTS
JunoParseUnparse =
BEGIN OPEN JunoParseUnparse;
ROPE: TYPE = Rope.ROPE;

- - - - - LEXER/PARSER/UNPARSER TABLES

NewTable: PUBLIC PROC RETURNS [table: Table] =

BEGIN
table ← NEW [TableRep ←
[maxAtomLength: 200,
maxStringLength: 200,
chType: ALL [op],
twoCharOps: NIL,

defaultAtomProps: NEW [Props ←
[identifier: TRUE, unparseType: zero]]
]];

SetDefaultCharTypes[table]
END;

SetCharType: PUBLIC PROC[table: Table, from, to: CHAR, chType: CharType] =

BEGIN
FOR c: CHAR IN [from..to] DO table.chType[c] ← chType ENDLOOP
END;

SetDefaultCharTypes: PROC [table: Table] =

BEGIN OPEN table;
ops: Rope.ROPE = "+-*/|\\()[]{}<>=~@#$%&^←:;,.!?";
FOR c: CHAR IN [0C .. 255C] DO chType[c] ← other ENDLOOP;
FOR c: CHAR IN ['a .. 'z] DO chType[c] ← letter ENDLOOP;
FOR c: CHAR IN ['A .. 'Z] DO chType[c] ← letter ENDLOOP;
FOR c: CHAR IN ['0 .. '9] DO chType[c] ← digit ENDLOOP;
FOR i: INT IN [0..ops.Length[]) DO chType[ops.Fetch[i]] ← op ENDLOOP;
chType['"] ← quote;
chType[IO.CR] ← blank;
chType[IO.SP] ← blank;
chType[IO.LF] ← blank;
chType[IO.FF] ← blank;
chType[IO.NUL] ← blank;
chType[IO.TAB] ← blank
END;

EnterTwoCharOp: PUBLIC PROC [table: Table, c1, c2: CHAR] =

BEGIN
atom: ATOM = Atom.MakeAtom[Rope.Cat[Rope.FromChar[c1], Rope.FromChar[c2]]];
table.twoCharOps ← CONS[[chars: [c1, c2], atom: atom], table.twoCharOps]
END;

EnterAlias: PUBLIC PROC [table: Table, alias, standard: ATOM] =

BEGIN
Atom.PutProp[atom: alias, prop: table, val: standard]
END;

PropRec: TYPE = RECORD [name: ATOM, props: Props];

EnterAtomProps: PUBLIC PROC [table: Table, atom: ATOM, props: Props] =

{Atom.PutProp[atom: atom, prop: table, val: NEW [Props ← props]]};

GetAtomProps: PUBLIC PROC [atom: ATOM, table: Table]
RETURNS [name: ATOM, rp: REF Props] =

BEGIN
DO
ref: REF ← Atom.GetProp[atom: atom, prop: table];
IF ref=NIL THEN RETURN [atom, table.defaultAtomProps]; -- no special props defined
WITH ref SELECT FROM
p: REF Props => {RETURN [atom, p]}; -- properties
a: ATOM => {atom ← a}; -- alias
ENDCASE => ERROR;
ENDLOOP
END;

- - - - - LEXING

Exhausted: PUBLIC PROC [stream: Stream, table: Table] RETURNS [BOOL] =

{WHILE stream.pos < stream.len DO
IF table.chType[stream.rope.Fetch[stream.pos]] # blank THEN RETURN[FALSE];
stream.pos←stream.pos+1
ENDLOOP;
RETURN[TRUE]};

Lex: PUBLIC PROC [stream: Stream, table: Table]
RETURNS [lex: Lexeme, error: ROPE, rp: REF Props] =

BEGIN OPEN table;

EndOfStream: PROC RETURNS [BOOL] = INLINE
-- returns true if next char in lexer.stream is c

BEGIN
RETURN [stream.pos >= stream.len]
END;

CurChar: PROC RETURNS [c: CHAR] = INLINE
-- returns next char in lexer.stream

BEGIN
RETURN [stream.rope.Fetch[stream.pos]]
END;

SeeChar: PROC [c: CHAR] RETURNS [BOOL] = INLINE
-- returns true if next char in lexer.stream is c

BEGIN
RETURN [NOT EndOfStream[] AND CurChar[] = c]
END;

SeeType: PROC [t: CharType] RETURNS [BOOL] = INLINE
-- returns true if next char in lexer.stream is of type t

BEGIN
RETURN [NOT EndOfStream[] AND chType[CurChar[]] = t]
END;

Skip: PROC = INLINE
-- advances lexer.stream.

BEGIN
stream.pos ← stream.pos+1
END;

first: INT ← stream.pos; -- position of first char of current lexeme in lexer.stream

NextID: PROC =

BEGIN
atom: ATOM;
Skip[];
WHILE SeeType[letter] OR SeeType[digit] DO
IF stream.pos-first >= table.maxAtomLength THEN
{error ← "identifier too long"; RETURN};
Skip[]
ENDLOOP;
[lex, rp] ← GetAtomProps[Atom.MakeAtom
[Rope.Substr[stream.rope, first, stream.pos-first]], table]
END;

NextOp: PROC =

BEGIN
atom: ATOM;
c1: CHAR ← CurChar[]; Skip[];
-- Check if it is a two-character op
IF SeeType[op] THEN
{c2: CHAR = CurChar[];
FOR ops: LIST OF TwoCharOp ← table.twoCharOps, ops.rest UNTIL ops = NIL DO
IF ops.first.chars = [c1, c2] THEN
{atom ← ops.first.atom; Skip[]; EXIT}
ENDLOOP}
ELSE
{-- Nope - single char op
atom ← Atom.MakeAtomFromChar[c1]};
[lex, rp] ← GetAtomProps[atom, table] -- props should be non-NIL if table is OK
END;

NextNumber: PROC =

BEGIN
d: INT;
n: INT ← CurChar[] - '0; Skip[];
WHILE SeeType[digit] DO
d ← CurChar[] - '0;
IF n > (LAST[INT] - d)/10 THEN
{error ← "number too big"; RETURN};
n ← n * 10 + d;
Skip[]
ENDLOOP;
IF SeeChar['.] THEN -- get decimal part
{x: REAL ← n;
y: REAL ← 0.1;
Skip[];
WHILE SeeType[digit] DO
x ← x + y * (CurChar[] - '0);
y ← y/10;
Skip[]
ENDLOOP;
lex ← NEW[REAL ← x]}
ELSE
{lex ← NEW[INT ← n]}
END;

NextString: PROC =

BEGIN
delim: CHAR ← CurChar[] ; Skip[];
WHILE NOT EndOfStream[] AND CurChar[] # delim DO
IF CurChar[] = IO.CR OR CurChar[] = IO.LF
OR SeeType[invalid] THEN
{error ← "invalid character in string"; RETURN};
IF stream.pos-first >= table.maxStringLength THEN
{error ← "string too long"; RETURN};
Skip[]
ENDLOOP;
IF NOT SeeChar[delim] THEN
{error ← "runaway string"; RETURN};
lex ← Rope.Substr[stream.rope, first+1, stream.pos-first-1];
Skip[]
END;

error ← NIL; rp ← NIL; lex ← NIL;
SELECT table.chType[CurChar[]] FROM
letter => NextID[];
digit => NextNumber[];
op => NextOp[];
quote => NextString[];
ENDCASE => {error ← "invalid character"};
IF error # NIL THEN -- backup stream to beginning of parsed string
{stream.pos ← first; lex ← NIL; rp ← NIL}

END;

- - - - - PARSER

weakOp: REF Props ← NEW[Props ← [matchfix: TRUE, bindingPower: -1]];

s2: LIST OF REF ANYCONS[weakOp, NIL];
s1: LIST OF REF ANYCONS[s2, NIL];
s3: LIST OF REF ANYCONS[NIL, s1];

Parse: PUBLIC PROC [stream: Stream, table: Table]
RETURNS [expr: Se, error: ROPE, openCount: INTEGER] =

BEGIN

e: LIST OF REF ANY;

savepos: INT;

lex: Lexeme;
rp: REF Props;

qm: BOOLTRUE;

IsMatchfixOrSubfixOp: PROC [x: Lexeme] RETURNS [BOOL] = INLINE

{WITH x SELECT FROM
atom: ATOM =>
{rpx: REF Props = GetAtomProps[atom].rp;
RETURN [rpx.matchfix OR rpx.subfix]};
ENDCASE => RETURN[FALSE]};

Beats: PROC [x: Lexeme, rp: REF Props] RETURNS [BOOL] = INLINE

{WITH x SELECT FROM
atom: ATOM =>
{rpx: REF Props = GetAtomProps[atom].rpx;
RETURN [rpx.bindingPower > rp.bindingPower]};
ENDCASE => RETURN[FALSE]};

Retract: PROC = INLINE

{t: LIST OF REF ANY ← e.rest; e.rest ← NIL; e ← t};

e ← s3;
openCount ← 0;
e.first ← NIL;

WHILE NOT Exhausted [stream, table] DO
savepos ← stream.pos;
[lex, error, rp] ← Lex[stream, table];
IF error # NIL THEN EXIT;
SELECT TRUE FROM

qm AND rp = NIL
=> {e.first ← lex; qm ← FALSE};

qm AND rp # NIL AND (rp.prefix OR rp.matchfix)
=> {e.first ← CONS[lex, CONS[NIL, e]]; e ← NARROW[Cdar[e]]};

~ qm AND rp # NIL AND (rp.infix OR rp.subfix)
=> {WHILE Beats[Caadr[e], rp] AND ~ IsMatchfixOrSubfixOp[Caadr[e]]
DO Retract[] ENDLOOP;
e.first ← CONS[lex, CONS[e.first, CONS[NIL, e]]]; e ← NARROW[Cddar[e]];
qm ← TRUE};

~qm AND rp # NIL AND rp.postfix
=> {WHILE Beats[Caadr[e], rp] DO Retract[] ENDLOOP;
e.first ← CONS[lex, CONS[e.first, NIL]]};

~qm AND rp # NIL AND rp.closefix
=> {WHILE ~ IsMatchfixOrSubfixOp[Caadr[e]] DO Retract[] ENDLOOP;
IF Matches[Caadr[e], rp]
THEN Retract[]
ELSE {error ← "Parse Error"; EXIT}}

ENDCASE => EXIT
ENDLOOP;

Unthread right branch of tree:

WHILE Caadr[e] # weakOp DO
IF IsMatchfixOrSubfixOp[Caadr[e]] THEN openCount ← openCount + 1;
Retract[];
ENDLOOP;
IF error = NIL AND (qm OR openCount # 0) THEN {error ← "Parse error "};
expr ← s3.first

END;

- - - - CAR, CDR, ETC

Car: PUBLIC PROC [r: Se] RETURNS [Se] = {RETURN[NARROW[r, LIST OF REF ANY].first]};

Cdr: PUBLIC PROC [r: Se] RETURNS [Se] = {RETURN[NARROW[r, LIST OF REF ANY].rest]};

Cadr: PUBLIC PROC [r: Se] RETURNS [Se] = {RETURN[Car[Cdr[r]]]};

Caddr: PUBLIC PROC [r: Se] RETURNS [Se] = {RETURN[Car[Cdr[Cdr[r]]]]};

Cddr: PUBLIC PROC [r: Se] RETURNS [Se] = {RETURN[Cdr[Cdr[r]]]};

Caadr: PUBLIC PROC [r: Se] RETURNS [Se] = {RETURN[Car[Car[Cdr[r]]]]};

Cdar: PUBLIC PROC [r: Se] RETURNS [Se] = {RETURN[Cdr[Car[r]]]};

Cddar: PUBLIC PROC [r: Se] RETURNS [Se] = {RETURN[Cdr[Cdr[Car[r]]]]};

- - - - - SYNTAX CHECKER

Or: PUBLIC PROC
[v1, v2, v3, v4, v5: VerdictAndCulprit ← [ofCourseNot]] RETURNS [r: VerdictAndCulprit] =

{r ← [ofCourseNot];
IF v1.verdict = yes THEN RETURN [[yes]];
IF v1.verdict = no THEN r ← v1;
IF v2.verdict = yes THEN RETURN [[yes]];
IF v2.verdict = no THEN r ← v2;
IF v3.verdict = yes THEN RETURN [[yes]];
IF v3.verdict = no THEN r ← v3;
IF v4.verdict = yes THEN RETURN [[yes]];
IF v4.verdict = no THEN r ← v5;
IF v5.verdict = yes THEN RETURN [[yes]];
IF v5.verdict = no THEN r ← v5};

False: SyntacticPredicate = {RETURN [[ofCourseNot]]};

HasForm: PUBLIC PROC [f: Se, op: REF, Arg1: SyntacticPredicate, Arg2: SyntacticPredicate ← NIL]
RETURNS [VerdictAndCulprit] =

{WITH Car[f] SELECT FROM
g: LIST OF REF ANY =>
{IF NOT Is[g, op] THEN RETURN [[OfCourseNot, g]];
IF (Arg2 = NIL) # (Cddr[g] = NIL) THEN RETURN[[ofCourseNot, f]];
{v: VerdictAndCulprit ← Arg1[Cdr[g]];
IF v.verdict # yes THEN RETURN [[no, v.culprit]];
IF Cddr[g] = NIL THEN RETURN [[no, NIL]];
v ← Arg2[Cddr[g]];
IF v.verdict # yes THEN RETURN [[no, aw.culprit]];
RETURN [[yes, NIL]]}}
ENDCASE => RETURN [[ofCourseNot, f]]};

Is: PUBLIC PROC [f: Se, op: REF] RETURNS [r: VerdictAndCulprit] =

{WITH Car[f] SELECT FROM
atom: ATOM =>
WITH op SELECT FROM
lst: LIST OF REF ANY =>
{FOR p: LIST OF REF ANY ← lst, p.rest WHILE p # NIL DO
IF p.first = atom THEN RETURN [[yes, NIL]]
ENDLOOP;
RETURN [[no, f]]};
atop =>
{IF atop = atom THEN RETURN [[yes, NIL]] ELSE RETURN [[no, f]]};
ENDCASE => ERROR;
ENDCASE => RETURN [[ofCourseNot, f]]}

END
.