-- file MesaScannerImpl.mesa
-- last edit by Russ Atkinson, February 9, 1982 6:29 pm
DIRECTORY
Ascii: TYPE USING
[BS, CR, FF, LF, NUL, TAB],
Atom: TYPE USING
[MakeAtom],
MesaScanner: TYPE USING
[GetProcType, Token, TokenKind],
Real: TYPE
USING [ReadReal],
Rope: TYPE
USING [Fetch, FromProc, ROPE, Size, Substr];
MesaScannerImpl: PROGRAM
IMPORTS Atom, Real, Rope
EXPORTS MesaScanner
SHARES Rope
= BEGIN OPEN Ascii, MesaScanner, Rope;
ControlZ: CHAR = 32C; -- Bravo escape char
ControlCR: CHAR = 215C; -- control CR (Bravo relic)
LastIntDiv10: INT = LAST[INT]/10;
LastCardDiv8: INT = LAST[LONG CARDINAL]/8;
GetToken: PUBLIC PROC
[get: GetProcType,index: INT]
RETURNS [token: Token] = {
FirstChar: PROC [kind: TokenKind] = {
-- accept the current char, set the kind
-- and grab the next character
token.kind ← kind;
token.start ← index - 1;
token.next ← index;
char ← get[index];
index ← index + 1;
};
AddChar: PROC [] = {
-- just accept the current character
token.next ← index};
AddCharPlus: PROC [] = {
-- accept the current character & get the next one
token.next ← index;
char ← get[index];
index ← index + 1};
NextChar: PROC = {
-- grab the next char, but don't add it to the token
char ← get[index];
index ← index + 1};
AcceptEscapeCode: PROC RETURNS [BOOL] = {
-- assume that the last char accepted was '\\
-- and that char is the 1st char in the escape code
SELECT char FROM
'n, 'N, 'r, 'R, 't, 'T, 'b, 'B,
'f, 'F, 'l, 'L, '', '", '\\ =>
{AddCharPlus[]; RETURN [TRUE]};
IN ['0..'3] =>
{AddCharPlus[];
IF char IN ['0..'7] THEN
{AddCharPlus[];
IF char IN ['0..'7] THEN
{AddCharPlus[];
RETURN [TRUE]}}};
ENDCASE;
token.msg ← "invalid escape code";
RETURN [FALSE];
};
char: CHAR ← get[index];
index ← index + 1;
token.msg ← NIL;
DO -- at start of token (we think)
{lag: CHAR ← char;
SELECT char FROM
ControlZ =>
{DO -- skip over the Bravo garbage
NextChar[];
SELECT char FROM
NUL => IF lag = NUL THEN GO TO EndOfFile;
CR, ControlCR => EXIT;
ENDCASE;
lag ← char;
ENDLOOP;
};
IN [NUL..' ], ControlCR =>
{NextChar[];
IF char = NUL AND lag = NUL THEN GO TO EndOfFile};
IN ['a..'z], IN ['A..'Z] =>
{FirstChar[tokenID];
DO
SELECT char FROM
IN ['a..'z], IN ['A..'Z], IN ['0..'9] =>
{AddCharPlus[]; LOOP};
ENDCASE => RETURN;
ENDLOOP;
};
'0, '1, '2, '3, '4, '5, '6, '7, '8, '9 =>
{-- accept the first run of digits
card: CARDINAL ← char - '0;
octalValid: BOOL ← card < 8;
small: BOOL ← octalValid;
FirstChar[tokenINT];
WHILE char IN ['0..'9] DO
octalValid ← octalValid AND char IN ['0..'7];
card ← card * 8 + (char-'0);
small ← small AND octalValid AND card < 400B;
AddCharPlus[];
ENDLOOP;
IF char = '. THEN
{-- determine whether there is a fraction
NextChar[];
IF char = '. THEN RETURN; -- no fraction
token.kind ← tokenREAL;
AddChar[];
WHILE char IN ['0..'9] DO
AddCharPlus[];
ENDLOOP};
SELECT char FROM
'e, 'E =>
{-- accept the exponent
AddCharPlus[];
IF char = '- OR char = '+ THEN AddCharPlus[];
WHILE char IN ['0..'9] DO
AddCharPlus[];
ENDLOOP;
token.kind ← tokenREAL};
'b, 'B =>
{IF token.kind = tokenREAL THEN RETURN;
AddCharPlus[];
IF NOT octalValid THEN
{token.msg ← "invalid octal constant"; GO TO Error}};
'c, 'C =>
{IF token.kind = tokenREAL THEN RETURN;
AddCharPlus[];
IF NOT small THEN
{token.msg ← "invalid character code"; GO TO Error};
token.kind ← tokenCHAR};
ENDCASE;
RETURN};
',, ';, ':, '←, '#, '~, '+, '*, '/, '↑, '@, '!, '(, '), '[, '], '{, '} =>
{FirstChar[tokenSINGLE]; RETURN};
'' =>
{FirstChar[tokenCHAR];
lag ← char;
AddCharPlus[];
IF lag # '\\ THEN RETURN;
IF AcceptEscapeCode[] THEN RETURN;
GO TO Error;
};
'" =>
{FirstChar[tokenROPE];
DO -- eat up the string/rope literal
SELECT char FROM
'" =>
{AddCharPlus[];
IF char # '" THEN EXIT};
'\\ =>
{AddCharPlus[];
IF AcceptEscapeCode[] THEN LOOP;
GO TO Error};
NUL =>
{AddCharPlus[];
IF char # NUL THEN LOOP;
token.msg ← "end-of-file in string literal";
GO TO Error};
ENDCASE;
AddCharPlus[];
ENDLOOP;
-- accept trailing L (for local frame designation)
IF char = 'L OR char = 'l THEN AddCharPlus[];
RETURN;
};
'$ =>
{FirstChar[tokenATOM];
SELECT char FROM
IN ['a..'z], IN ['A..'Z] => {};
ENDCASE =>
{token.msg ← "invalid atom"; GO TO Error};
DO -- accumulate rest of atom name
SELECT char FROM
IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {};
ENDCASE => EXIT;
AddCharPlus[]
ENDLOOP;
RETURN};
'- => {-- tokenMINUS or comment processing
FirstChar[tokenSINGLE];
IF char # '- THEN RETURN;
token.kind ← tokenCOMMENT;
lag ← CR;
-- now we have started a comment
DO
AddCharPlus[];
SELECT char FROM
CR, ControlCR => GO TO LastOne;
'- => IF lag = '- THEN GO TO LastOne;
NUL => IF lag = NUL THEN
{token.next ← index - 2;
RETURN};
ENDCASE;
lag ← char;
ENDLOOP;
};
'. =>
{-- either a REAL or a dot or a dotdot
FirstChar[tokenSINGLE];
IF char = '. THEN GO TO Double;
IF char NOT IN ['0..'9] THEN RETURN;
token.kind ← tokenREAL;
AddCharPlus[];
WHILE char IN ['0..'9] DO
AddCharPlus[];
ENDLOOP;
IF char = 'E OR char = 'e THEN
{-- accept the exponent
AddCharPlus[];
IF char = '- OR char = '+ THEN AddCharPlus[];
WHILE char IN ['0..'9] DO
AddCharPlus[];
ENDLOOP};
RETURN;
};
'= =>
{-- either '=' or '=>'
FirstChar[tokenSINGLE];
IF char = '>
THEN GO TO Double
ELSE RETURN};
'>, '< =>
{-- either '>' or '>=' (or '<' or '<=')
FirstChar[tokenSINGLE];
IF char = '=
THEN GO TO Double
ELSE RETURN};
ENDCASE =>
{FirstChar[tokenERROR];
token.msg ← "invalid character";
RETURN};
EXITS
Error =>
{token.kind ← tokenERROR;
RETURN};
Double =>
{token.kind ← tokenDOUBLE;
token.next ← index;
RETURN};
LastOne =>
{token.next ← index;
RETURN};
EndOfFile =>
{token.start ← token.next ← index - 2;
token.kind ← tokenEOF;
RETURN}};
ENDLOOP;
};
RealFromToken: PUBLIC PROC
[get: GetProcType, token: Token] RETURNS [REAL] = {
-- takes a Token into a REAL (parses the literal)
-- signals WrongKind if token.kind # tokenREAL
index: INT ← token.start;
gp: PROC RETURNS [c: CHAR] = {
c ← get[index];
index ← index + 1;
};
IF token.kind # tokenREAL THEN ERROR WrongKind;
RETURN [Real.ReadReal[gp]];
};
IntFromToken: PUBLIC PROC
[get: GetProcType, token: Token] RETURNS [x: INT] = {
-- takes a Token into a INT (parses the literal)
-- signals WrongKind if token.kind # tokenINT
base: NAT ← 10;
lc: CHAR ← 0C;
end: INT ← token.next - 1;
over: INT ← LastIntDiv10;
IF token.kind # tokenINT THEN ERROR WrongKind;
lc ← get[end];
x ← 0;
IF lc = 'b OR lc = 'B THEN
{base ← 8; end ← end - 1; over ← LastCardDiv8};
FOR i: INT IN [token.start..end] DO
IF x > over THEN ERROR IntegerOverflow;
x ← x * base + (get[i] - '0);
IF x < 0 AND base = 10 THEN ERROR IntegerOverflow;
ENDLOOP;
};
CharFromToken: PUBLIC PROC
[get: GetProcType, token: Token] RETURNS [c: CHAR] = {
-- takes a Token into a CHAR (parses the literal)
-- signals WrongKind if token.kind # tokenCHAR
IF token.kind # tokenCHAR THEN ERROR WrongKind;
IF get[token.start] = '' THEN
IF (c ← get[token.start+1]) = '\\
THEN RETURN [ParseEscapeCode[get, token.start+2].c]
ELSE RETURN;
RETURN [ParseEscapeCode[get, token.start].c];
};
RopeFromToken: PUBLIC PROC
[get: GetProcType, token: Token] RETURNS [new: ROPE] = {
-- takes a Token into a ROPE (parses the literal)
-- signals WrongKind if token.kind # tokenROPE
index: INT ← token.start + 1;
end: INT ← token.next - 1;
escaped: BOOL ← FALSE;
res: INT ← 0;
gp: PROC RETURNS [c: CHAR] = {
IF index >= end THEN RETURN [0C];
c ← get[index];
index ← index + 1;
res ← res + 1;
IF c = '" THEN {index ← index + 1; escaped ← TRUE};
IF c # '\\ THEN RETURN;
[c, index] ← ParseEscapeCode[get, index+1];
escaped ← TRUE};
IF token.kind # tokenROPE THEN ERROR WrongKind;
IF get[end] = '" THEN end ← end - 1;
new ← Rope.FromProc[end - index, gp];
IF escaped THEN new ← new.Substr[0, res];
};
AtomFromToken: PUBLIC PROC
[get: GetProcType, token: Token] RETURNS [ATOM] = {
-- takes a Token into a ATOM (parses the literal)
-- signals WrongKind if token.kind # tokenATOM
IF token.kind # tokenATOM THEN ERROR WrongKind;
RETURN[Atom.MakeAtom[ExtractRope[get, token.start+1, token.next]]];
};
ContentsFromToken: PUBLIC PROC
[get: GetProcType, token: Token] RETURNS [ROPE] = {
-- gets the contents of the token as a ROPE
-- can be used on any token
RETURN [ExtractRope[get, token.start, token.next]];
};
WrongKind: PUBLIC ERROR = CODE;
IntegerOverflow: PUBLIC ERROR = CODE;
-- utility routines
ExtractRope: PROC
[get: GetProcType, start,end: INT] RETURNS [ROPE] = {
size: INT ← end - start;
gp: PROC RETURNS [c: CHAR] = {
c ← get[start];
start ← start + 1};
RETURN[Rope.FromProc[size, gp]];
};
ParseEscapeCode: PROC
[get: GetProcType, index: INT]
RETURNS [c: CHAR, next: INT] = {
c ← get[index];
next ← index + 1;
SELECT c FROM
'n, 'N => c ← Ascii.CR;
'r, 'R => c ← Ascii.CR;
't, 'T => c ← Ascii.TAB;
'b, 'B => c ← Ascii.BS;
'f, 'F => c ← Ascii.FF;
'l, 'L => c ← Ascii.LF;
IN ['0..'3] =>
{cc: CHAR;
c ← 0C + (c-'0);
cc ← get[next];
IF cc IN ['0..'7] THEN
{c ← 0C + ((cc-'0) + (c-0C)*10B);
cc ← get[next ← next + 1];
IF cc IN ['0..'7] THEN
{c ← 0C + ((cc-'0) + (c-0C)*10B);
next ← next + 1}}};
ENDCASE;
};
-- test facilities
TestResults: TYPE = RECORD
[token: Token,
contents: ROPE,
literal: REF];
Test: PROC [rope: ROPE] RETURNS [list: LIST OF TestResults] = {
get: GetProcType = {
IF index < rope.Size[] THEN RETURN [rope.Fetch[index]];
RETURN [NUL];
};
tget: GetProcType = {
c: CHAR ← get[index];
IF c = '& THEN c ← 'A;
RETURN [c];
};
index: INT ← 0;
lag, temp: LIST OF TestResults ← NIL;
list ← NIL;
DO
token: Token ← GetToken[tget, index];
literal: REF ← NIL;
IF token.kind = tokenEOF THEN EXIT;
index ← token.next;
SELECT token.kind FROM
tokenINT => literal ← NEW[INT ← IntFromToken[get, token]];
tokenREAL => literal ← NEW[REAL ← RealFromToken[get, token]];
tokenCHAR => literal ← NEW[CHAR ← CharFromToken[get, token]];
tokenROPE => literal ← RopeFromToken[get, token];
tokenATOM => literal ← AtomFromToken[get, token];
ENDCASE;
temp ← LIST[[token, ContentsFromToken[get, token], literal]];
IF lag = NIL THEN list ← temp ELSE lag.rest ← temp;
lag ← temp;
ENDLOOP;
};
END.