CedarScannerImpl.mesa
Russ Atkinson, November 15, 1982 1:03 pm
Last Edited by: Teitelman, January 24, 1983 1:32 pm
DIRECTORY
Ascii USING [BS, CR, FF, LF, NUL, TAB],
Atom USING [MakeAtom],
CedarScanner USING [GetClosure, GetProc, Token, TokenKind],
Real USING [ReadReal],
Rope USING [Fetch, FromProc, ROPE, Size, Substr];
CedarScannerImpl: CEDAR PROGRAM
IMPORTS Atom, Real, Rope
EXPORTS CedarScanner
SHARES Rope
= BEGIN OPEN Ascii, CedarScanner, Rope;
ControlZ: CHAR = 32C; -- Bravo escape char
ControlCR: CHAR = 215C; -- control CR (Bravo relic)
LastIntDiv10: INT = LAST[INT]/10;
LastCardDiv8: INT = LAST[LONG CARDINAL]/8;
GetToken: PUBLIC PROC [get: GetClosure,index: INT] RETURNS [token: Token] = {
FirstChar: PROC [kind: TokenKind] = {
accept the current char, set the kind
and grab the next character
token.kind ← kind;
token.start ← index - 1;
token.next ← index;
char ← get.proc[get.data, index];
index ← index + 1;
};
AddChar: PROC [] = {
just accept the current character
token.next ← index};
AddCharPlus: PROC [] = {
accept the current character & get the next one
token.next ← index;
char ← get.proc[get.data, index];
index ← index + 1};
NextChar: PROC = {
grab the next char, but don't add it to the token
char ← get.proc[get.data, index];
index ← index + 1};
AcceptEscapeCode: PROC RETURNS [BOOL] = {
assume that the last char accepted was '\\
and that char is the 1st char in the escape code
SELECT char FROM
'n, 'N, 'r, 'R, 't, 'T, 'b, 'B,
'f, 'F, 'l, 'L, '', '", '\\ =>
{AddCharPlus[]; RETURN [TRUE]};
IN ['0..'3] =>
{AddCharPlus[];
IF char IN ['0..'7] THEN
{AddCharPlus[];
IF char IN ['0..'7] THEN
{AddCharPlus[];
RETURN [TRUE]}}};
ENDCASE;
token.msg ← "invalid escape code";
RETURN [FALSE];
};
char: CHAR ← get.proc[get.data, index];
index ← index + 1;
token.msg ← NIL;
DO -- at start of token (we think)
{lag: CHAR ← char;
SELECT char FROM
ControlZ =>
{DO -- skip over the Bravo garbage
NextChar[];
SELECT char FROM
NUL => IF lag = NUL THEN GO TO EndOfFile;
CR, ControlCR => EXIT;
ENDCASE;
lag ← char;
ENDLOOP;
};
IN [NUL..' ], ControlCR =>
{NextChar[];
IF char = NUL AND lag = NUL THEN GO TO EndOfFile};
IN ['a..'z], IN ['A..'Z] =>
{FirstChar[tokenID];
DO
SELECT char FROM
IN ['a..'z], IN ['A..'Z], IN ['0..'9] =>
{AddCharPlus[]; LOOP};
ENDCASE => RETURN;
ENDLOOP;
};
'0, '1, '2, '3, '4, '5, '6, '7, '8, '9 =>
{-- accept the first run of digits
card: CARDINAL ← char - '0;
octalValid: BOOL ← card < 8;
small: BOOL ← octalValid;
hexLast: INT ← -1;
FirstChar[tokenINT];
WHILE char IN ['0..'9] DO
octalValid ← octalValid AND char IN ['0..'7];
card ← card * 8 + (char-'0);
small ← small AND octalValid AND card < 400B;
AddCharPlus[];
ENDLOOP;
very special scan to determine if this is hex
hexLast ← index-1;
DO
tc: CHAR ← get.proc[get.data, hexLast];
SELECT tc FROM
IN ['0..'9], IN ['A..'F], IN ['a..'f] =>
{hexLast ← hexLast + 1; LOOP};
'X, 'x, 'H, 'h =>
{-- a hex constant!
token.next ← hexLast + 1;
RETURN;
};
ENDCASE => EXIT;
ENDLOOP;
IF char = '. THEN
{-- determine whether there is a fraction
NextChar[];
IF char = '. THEN RETURN; -- no fraction
token.kind ← tokenREAL;
AddChar[];
WHILE char IN ['0..'9] DO
AddCharPlus[];
ENDLOOP};
SELECT char FROM
'e, 'E =>
{-- accept the exponent
AddCharPlus[];
IF char = '- OR char = '+ THEN AddCharPlus[];
WHILE char IN ['0..'9] DO
AddCharPlus[];
ENDLOOP;
token.kind ← tokenREAL};
'b, 'B =>
{IF token.kind = tokenREAL THEN RETURN;
AddCharPlus[];
IF NOT octalValid THEN
{token.msg ← "invalid octal constant"; GO TO Error}};
'c, 'C =>
{IF token.kind = tokenREAL THEN RETURN;
AddCharPlus[];
IF NOT small OR card > 377B THEN
{token.msg ← "invalid character code"; GO TO Error};
token.kind ← tokenCHAR};
ENDCASE;
RETURN};
',, ';, ':, '←, '#, '~, '+, '*, '/, '^, '@, '!, '(, '), '[, '], '{, '} =>
{FirstChar[tokenSINGLE]; RETURN};
'' =>
{FirstChar[tokenCHAR];
lag ← char;
AddCharPlus[];
IF lag = NUL AND char = NUL THEN
{token.msg ← "end-of-file in char literal";
GOTO Error;
};
IF lag # '\\ THEN RETURN;
IF AcceptEscapeCode[] THEN RETURN;
GO TO Error;
};
'" =>
{FirstChar[tokenROPE];
DO -- eat up the string/rope literal
SELECT char FROM
'" =>
{AddCharPlus[];
IF char # '" THEN EXIT};
'\\ =>
{AddCharPlus[];
IF AcceptEscapeCode[] THEN LOOP;
GO TO Error};
NUL =>
{AddCharPlus[];
IF char # NUL THEN LOOP;
token.msg ← "end-of-file in string literal";
GO TO Error};
ENDCASE;
AddCharPlus[];
ENDLOOP;
accept trailing L (for local frame designation)
IF char = 'L OR char = 'l THEN AddCharPlus[];
RETURN;
};
'$ =>
{FirstChar[tokenATOM];
SELECT char FROM
IN ['a..'z], IN ['A..'Z] => {};
ENDCASE =>
{token.msg ← "invalid atom"; GO TO Error};
DO -- accumulate rest of atom name
SELECT char FROM
IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {};
ENDCASE => EXIT;
AddCharPlus[]
ENDLOOP;
RETURN};
'- => {-- tokenMINUS or comment processing
FirstChar[tokenSINGLE];
IF char # '- THEN RETURN;
token.kind ← tokenCOMMENT;
lag ← CR;
now we have started a comment
DO
AddCharPlus[];
SELECT char FROM
CR, ControlCR => GO TO LastOne;
'- => IF lag = '- THEN GO TO LastOne;
NUL => IF lag = NUL THEN
{token.next ← index - 2;
RETURN};
ENDCASE;
lag ← char;
ENDLOOP;
};
'. =>
{-- either a REAL or a dot or a dotdot
FirstChar[tokenSINGLE];
IF char = '. THEN GO TO Double;
IF char NOT IN ['0..'9] THEN RETURN;
token.kind ← tokenREAL;
AddCharPlus[];
WHILE char IN ['0..'9] DO
AddCharPlus[];
ENDLOOP;
IF char = 'E OR char = 'e THEN
{-- accept the exponent
AddCharPlus[];
IF char = '- OR char = '+ THEN AddCharPlus[];
WHILE char IN ['0..'9] DO
AddCharPlus[];
ENDLOOP};
RETURN;
};
'= =>
{-- either '=' or '=>'
FirstChar[tokenSINGLE];
IF char = '>
THEN GO TO Double
ELSE RETURN};
'>, '< =>
{-- either '>' or '>=' (or '<' or '<=')
FirstChar[tokenSINGLE];
IF char = '=
THEN GO TO Double
ELSE RETURN};
ENDCASE =>
{FirstChar[tokenERROR];
token.msg ← "invalid character";
RETURN};
EXITS
Error =>
{token.kind ← tokenERROR;
RETURN};
Double =>
{token.kind ← tokenDOUBLE;
token.next ← index;
RETURN};
LastOne =>
{token.next ← index;
RETURN};
EndOfFile =>
{token.start ← token.next ← index - 2;
token.kind ← tokenEOF;
RETURN}};
ENDLOOP;
};
RealFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [REAL] = TRUSTED {
takes a Token into a REAL (parses the literal)
signals WrongKind if token.kind # tokenREAL
index: INT ← token.start;
gp: PROC RETURNS [c: CHAR] = TRUSTED {
c ← get.proc[get.data, index];
index ← index + 1;
};
IF token.kind # tokenREAL THEN ERROR WrongKind;
RETURN [Real.ReadReal[gp]];
};
IntFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [x: INT ← 0] = {
takes a Token into a INT (parses the literal)
signals WrongKind if token.kind # tokenINT
base: NAT ← 10;
lc: CHAR ← 0C;
end: INT ← token.next - 1;
over: INT ← LastIntDiv10;
IF token.kind # tokenINT THEN ERROR WrongKind;
lc ← get.proc[get.data, end];
SELECT lc FROM
'H, 'h, 'X, 'x =>
{FOR i: INT IN [token.start..end-1] DO
c: CHAR ← get.proc[get.data, i];
SELECT c FROM
IN ['0..'9] => x ← x * 16 + (get.proc[get.data, i] - '0);
IN ['A..'F] => x ← x * 16 + (get.proc[get.data, i] - 'A) + 10;
IN ['a..'f] => x ← x * 16 + (get.proc[get.data, i] - 'a) + 10;
ENDCASE;
ENDLOOP;
RETURN};
'B, 'b =>
{base ← 8; end ← end - 1; over ← LastCardDiv8};
ENDCASE;
FOR i: INT IN [token.start..end] DO
IF x > over THEN ERROR IntegerOverflow;
x ← x * base + (get.proc[get.data, i] - '0);
IF x < 0 AND base = 10 THEN ERROR IntegerOverflow;
ENDLOOP;
};
CharFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [c: CHAR] = {
takes a Token into a CHAR (parses the literal)
signals WrongKind if token.kind # tokenCHAR
x: CARDINAL ← 0;
IF token.kind # tokenCHAR THEN ERROR WrongKind;
IF get.proc[get.data, token.start] = '' THEN
IF (c ← get.proc[get.data, token.start+1]) = '\\
THEN RETURN [ParseEscapeCode[get, token.start+2].c]
ELSE RETURN;
FOR i: INT IN [token.start..token.next - 2] DO
tc: CHAR ← get.proc[get.data, i];
x ← x * 8 + (tc - '0);
ENDLOOP;
c ← x + 0C;
};
SingleFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [c: CHAR] = {
takes a Token into a CHAR
signals WrongKind if token.kind # tokenSINGLE
IF token.kind # tokenSINGLE THEN ERROR WrongKind;
c ← get.proc[get.data, token.start];
};
RopeFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [new: ROPE] = {
takes a Token into a ROPE (parses the literal)
signals WrongKind if token.kind # tokenROPE
index: INT ← token.start + 1;
end: INT ← token.next - 1;
escaped: BOOLFALSE;
res: INT ← 0;
gp: PROC RETURNS [c: CHAR] = {
IF index > end THEN RETURN [0C];
c ← get.proc[get.data, index];
index ← index + 1;
res ← res + 1;
SELECT c FROM
'\\ => {[c, index] ← ParseEscapeCode[get, index]; escaped ← TRUE};
'" => {index ← index + 1; escaped ← TRUE};
ENDCASE;
};
IF token.kind # tokenROPE THEN ERROR WrongKind;
SELECT get.proc[get.data, end] FROM
'l, 'L => end ← end - 2;
'" => end ← end - 1;
ENDCASE;
new ← Rope.FromProc[end - index + 1, gp];
IF escaped THEN new ← new.Substr[0, res];
};
AtomFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [ATOM] = {
takes a Token into a ATOM (parses the literal)
signals WrongKind if token.kind # tokenATOM
IF token.kind # tokenATOM THEN ERROR WrongKind;
RETURN[Atom.MakeAtom[ExtractRope[get, token.start+1, token.next]]];
};
ContentsFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [ROPE] = {
gets the contents of the token as a ROPE
can be used on any token
RETURN [ExtractRope[get, token.start, token.next]];
};
WrongKind: PUBLIC ERROR = CODE;
IntegerOverflow: PUBLIC ERROR = CODE;
utility routines
ExtractRope: PROC [get: GetClosure, start,end: INT] RETURNS [ROPE] = {
size: INT ← end - start;
gp: PROC RETURNS [c: CHAR] = {
c ← get.proc[get.data, start];
start ← start + 1};
RETURN[Rope.FromProc[size, gp]];
};
ParseEscapeCode: PROC [get: GetClosure, index: INT]
RETURNS [c: CHAR, next: INT] = {
c ← get.proc[get.data, index];
next ← index + 1;
SELECT c FROM
'n, 'N => c ← Ascii.CR;
'r, 'R => c ← Ascii.CR;
't, 'T => c ← Ascii.TAB;
'b, 'B => c ← Ascii.BS;
'f, 'F => c ← Ascii.FF;
'l, 'L => c ← Ascii.LF;
IN ['0..'3] =>
{cc: CHAR;
c ← 0C + (c-'0);
cc ← get.proc[get.data, next];
IF cc IN ['0..'7] THEN
{c ← 0C + ((cc-'0) + (c-0C)*10B);
cc ← get.proc[get.data, next ← next + 1];
IF cc IN ['0..'7] THEN
{c ← 0C + ((cc-'0) + (c-0C)*10B);
next ← next + 1}}};
ENDCASE;
};
test facilities
TestResults: TYPE = RECORD [token: Token, contents: ROPE, literal: REF];
get: GetProc = {
rope: ROPE = NARROW[data];
IF index < rope.Size[] THEN RETURN [rope.Fetch[index]];
RETURN [NUL];
};
getClosure: GetClosure ← [get];
tget: GetProc = {
c: CHAR ← get[getClosure.data, index];
IF c = '& THEN c ← 'A;
RETURN [c];
};
tgetClosure: GetClosure ← [tget];
Test: PROC [rope: ROPE] RETURNS [list: LIST OF TestResults] = {
index: INT ← 0;
lag, temp: LIST OF TestResults ← NIL;
getClosure.data ← rope;
tgetClosure.data ← rope;
list ← NIL;
DO
token: Token ← GetToken[tgetClosure, index];
literal: REFNIL;
IF token.kind = tokenEOF THEN EXIT;
index ← token.next;
SELECT token.kind FROM
tokenINT => literal ← NEW[INT ← IntFromToken[getClosure, token]];
tokenREAL => literal ← NEW[REAL ← RealFromToken[getClosure, token]];
tokenCHAR => literal ← NEW[CHAR ← CharFromToken[getClosure, token]];
tokenROPE => literal ← RopeFromToken[getClosure, token];
tokenATOM => literal ← AtomFromToken[getClosure, token];
ENDCASE;
temp ← LIST[[token, ContentsFromToken[getClosure, token], literal]];
IF lag = NIL THEN list ← temp ELSE lag.rest ← temp;
lag ← temp;
ENDLOOP;
};
END.