CedarScannerImpl.mesa
Russ Atkinson, July 20, 1983 11:23 am
Last Edited by: Teitelman, January 24, 1983 1:32 pm
DIRECTORY
Ascii USING [BS, CR, FF, LF, NUL, TAB],
Atom USING [MakeAtom],
CedarScanner USING [GetClosure, GetProc, Token, TokenKind],
Real USING [ReadReal],
Rope USING [Fetch, FromProc, ROPE, Size, Substr];
CedarScannerImpl:
CEDAR
PROGRAM
IMPORTS Atom, Real, Rope
EXPORTS CedarScanner
SHARES Rope
= BEGIN OPEN Ascii, CedarScanner, Rope;
constants
ControlZ: CHAR = 32C; -- Bravo escape char
ControlCR: CHAR = 215C; -- control CR (Bravo relic)
LastIntDiv10: INT = LAST[INT]/10;
LastCardDiv8: INT = LAST[LONG CARDINAL]/8;
GetToken:
PUBLIC
PROC [get: GetClosure,index:
INT]
RETURNS [token: Token] = {
FirstChar:
PROC [kind: TokenKind] = {
accept the current char, set the kind
and grab the next character
token.kind ← kind;
token.start ← index - 1;
token.next ← index;
char ← get.proc[get.data, index];
index ← index + 1;
};
AddChar:
PROC [] = {
just accept the current character
token.next ← index};
AddCharPlus:
PROC [] = {
accept the current character & get the next one
token.next ← index;
char ← get.proc[get.data, index];
index ← index + 1};
NextChar:
PROC = {
grab the next char, but don't add it to the token
char ← get.proc[get.data, index];
index ← index + 1};
PeekChar:
PROC = {
look at the next char, but don't add it to the token, and don't advance the index
char ← get.proc[get.data, index];
};
AcceptEscapeCode:
PROC
RETURNS [
BOOL] = {
assume that the last char accepted was '\\
and that char is the 1st char in the escape code
SELECT char
FROM
'n, 'N, 'r, 'R, 't, 'T, 'b, 'B,
'f, 'F, 'l, 'L, '', '", '\\ => {
AddCharPlus[];
RETURN [TRUE]};
IN ['0..'3] => {
AddCharPlus[];
IF char
IN ['0..'7]
THEN {
AddCharPlus[];
IF char
IN ['0..'7]
THEN {
AddCharPlus[];
RETURN [TRUE]}}};
ENDCASE;
token.msg ← "invalid escape code";
RETURN [FALSE];
};
char: CHAR ← get.proc[get.data, index];
index ← index + 1;
token.msg ← NIL;
DO
-- at start of token (we think)
{
-- start block for exits
lag: CHAR ← char;
SELECT char
FROM
ControlZ => {
DO
-- skip over the Bravo garbage
NextChar[];
SELECT char
FROM
NUL => IF lag = NUL THEN GO TO EndOfFile;
CR, ControlCR => EXIT;
ENDCASE;
lag ← char;
ENDLOOP;
};
IN [
NUL..' ], ControlCR => {
NextChar[];
IF char = NUL AND lag = NUL THEN GO TO EndOfFile};
IN ['a..'z],
IN ['A..'Z] => {
FirstChar[tokenID];
DO
SELECT char
FROM
IN ['a..'z],
IN ['A..'Z],
IN ['0..'9] => {
AddCharPlus[];
LOOP};
ENDCASE => RETURN;
ENDLOOP;
};
'0, '1, '2, '3, '4, '5, '6, '7, '8, '9 => {
accept the first run of digits
card: CARDINAL ← char - '0;
octalValid: BOOL ← card < 8;
small: BOOL ← octalValid;
hexLast: INT ← -1;
FirstChar[tokenINT];
WHILE char
IN ['0..'9]
DO
octalValid ← octalValid AND char IN ['0..'7];
card ← card * 8 + (char-'0);
small ← small AND octalValid AND card < 400B;
AddCharPlus[];
ENDLOOP;
very special scan to determine if this is hex
hexLast ← index-1;
DO
tc: CHAR ← get.proc[get.data, hexLast];
SELECT tc
FROM
IN ['0..'9],
IN ['A..'F],
IN ['a..'f] => {
hexLast ← hexLast + 1; LOOP};
'X, 'x, 'H, 'h => {
a hex constant!
token.next ← hexLast + 1;
RETURN;
};
ENDCASE => EXIT;
ENDLOOP;
IF char = '.
THEN {
determine whether there is a fraction
PeekChar[]; -- look at the next char, but don't eat it
IF char = '. THEN RETURN; -- no fraction, will be a ".." token
token.kind ← tokenREAL;
AddChar[]; -- accept the original '.
WHILE char
IN ['0..'9]
DO
AddCharPlus[];
ENDLOOP};
SELECT char
FROM
'e, 'E => {
accept the exponent
AddCharPlus[];
IF char = '- OR char = '+ THEN AddCharPlus[];
WHILE char
IN ['0..'9]
DO
AddCharPlus[];
ENDLOOP;
token.kind ← tokenREAL;
};
'b, 'B => {
IF token.kind = tokenREAL THEN RETURN;
AddCharPlus[];
IF
NOT octalValid
THEN {
token.msg ← "invalid octal constant";
GO TO Error};
};
'c, 'C => {
IF token.kind = tokenREAL THEN RETURN;
AddCharPlus[];
IF
NOT small
OR card > 377B
THEN
{token.msg ← "invalid character code"; GO TO Error};
token.kind ← tokenCHAR;
};
ENDCASE;
RETURN;
};
',, ';, ':, '←, '#, '~, '+, '*, '/, '^, '@, '!, '(, '), '[, '], '{, '} => {
FirstChar[tokenSINGLE];
RETURN;
};
'' => {
FirstChar[tokenCHAR];
lag ← char;
AddCharPlus[];
IF lag =
NUL
AND char =
NUL
THEN
{token.msg ← "end-of-file in char literal";
GOTO Error;
};
IF lag # '\\ THEN RETURN;
IF AcceptEscapeCode[] THEN RETURN;
GO TO Error;
};
'" => {
FirstChar[tokenROPE];
DO
-- eat up the string/rope literal
SELECT char
FROM
'" => {
AddCharPlus[];
IF char # '" THEN EXIT};
'\\ => {
AddCharPlus[];
IF AcceptEscapeCode[] THEN LOOP;
GO TO Error};
NUL => {
AddCharPlus[];
IF char # NUL THEN LOOP;
token.msg ← "end-of-file in string literal";
GO TO Error};
ENDCASE;
AddCharPlus[];
ENDLOOP;
accept trailing L (for local frame designation)
IF char = 'L OR char = 'l THEN AddCharPlus[];
RETURN;
};
'$ => {
FirstChar[tokenATOM];
SELECT char
FROM
IN ['a..'z], IN ['A..'Z] => {};
ENDCASE =>
{token.msg ← "invalid atom"; GO TO Error};
DO
-- accumulate rest of atom name
SELECT char
FROM
IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {};
ENDCASE => EXIT;
AddCharPlus[]
ENDLOOP;
RETURN};
'- => {
tokenMINUS or comment processing
FirstChar[tokenSINGLE];
IF char # '- THEN RETURN;
token.kind ← tokenCOMMENT;
lag ← CR;
now we have started a comment
DO
AddCharPlus[];
SELECT char
FROM
CR, ControlCR => GO TO LastOne;
'- => IF lag = '- THEN GO TO LastOne;
NUL =>
IF lag =
NUL
THEN
{token.next ← index - 2;
RETURN};
ENDCASE;
lag ← char;
ENDLOOP;
};
'. => {
either a REAL or a dot or a dotdot
FirstChar[tokenSINGLE];
IF char = '. THEN GO TO Double;
IF char NOT IN ['0..'9] THEN RETURN;
token.kind ← tokenREAL;
AddCharPlus[];
WHILE char
IN ['0..'9]
DO
AddCharPlus[];
ENDLOOP;
IF char = 'E
OR char = 'e
THEN {
accept the exponent
AddCharPlus[];
IF char = '- OR char = '+ THEN AddCharPlus[];
WHILE char
IN ['0..'9]
DO
AddCharPlus[];
ENDLOOP};
RETURN;
};
'= => {
either '=' or '=>'
FirstChar[tokenSINGLE];
IF char = '>
THEN GO TO Double
ELSE RETURN;
};
'>, '< => {
either '>' or '>=' (or '<' or '<=')
FirstChar[tokenSINGLE];
IF char = '=
THEN GO TO Double
ELSE RETURN;
};
ENDCASE => {
FirstChar[tokenERROR];
token.msg ← "invalid character";
RETURN;
};
EXITS
Error => {
token.kind ← tokenERROR;
RETURN};
Double => {
token.kind ← tokenDOUBLE;
token.next ← index;
RETURN};
LastOne => {
token.next ← index;
RETURN};
EndOfFile => {
token.start ← token.next ← index - 2;
token.kind ← tokenEOF;
RETURN};
ENDLOOP;
};
RealFromToken:
PUBLIC
PROC [get: GetClosure, token: Token]
RETURNS [
REAL] =
TRUSTED {
takes a Token into a REAL (parses the literal)
signals WrongKind if token.kind # tokenREAL
index: INT ← token.start;
gp:
PROC
RETURNS [c:
CHAR] =
TRUSTED {
c ← get.proc[get.data, index];
index ← index + 1;
};
IF token.kind # tokenREAL THEN ERROR WrongKind;
RETURN [Real.ReadReal[gp]];
};
IntFromToken:
PUBLIC
PROC [get: GetClosure, token: Token]
RETURNS [x:
INT ← 0] = {
takes a Token into a INT (parses the literal)
signals WrongKind if token.kind # tokenINT
base: NAT ← 10;
lc: CHAR ← 0C;
end: INT ← token.next - 1;
over: INT ← LastIntDiv10;
IF token.kind # tokenINT THEN ERROR WrongKind;
lc ← get.proc[get.data, end];
SELECT lc
FROM
'H, 'h, 'X, 'x => {
FOR i:
INT
IN [token.start..end-1]
DO
c: CHAR ← get.proc[get.data, i];
SELECT c
FROM
IN ['0..'9] => x ← x * 16 + (get.proc[get.data, i] - '0);
IN ['A..'F] => x ← x * 16 + (get.proc[get.data, i] - 'A) + 10;
IN ['a..'f] => x ← x * 16 + (get.proc[get.data, i] - 'a) + 10;
ENDCASE;
ENDLOOP;
RETURN};
'B, 'b => {
base ← 8;
end ← end - 1;
over ← LastCardDiv8};
ENDCASE;
FOR i:
INT
IN [token.start..end]
DO
IF x > over THEN ERROR IntegerOverflow;
x ← x * base + (get.proc[get.data, i] - '0);
IF x < 0 AND base = 10 THEN ERROR IntegerOverflow;
ENDLOOP;
};
CharFromToken:
PUBLIC
PROC [get: GetClosure, token: Token]
RETURNS [c:
CHAR] = {
takes a Token into a CHAR (parses the literal)
signals WrongKind if token.kind # tokenCHAR
x: CARDINAL ← 0;
IF token.kind # tokenCHAR THEN ERROR WrongKind;
IF get.proc[get.data, token.start] = ''
THEN
IF (c ← get.proc[get.data, token.start+1]) = '\\
THEN RETURN [ParseEscapeCode[get, token.start+2].c]
ELSE RETURN;
FOR i:
INT
IN [token.start..token.next - 2]
DO
tc: CHAR ← get.proc[get.data, i];
x ← x * 8 + (tc - '0);
ENDLOOP;
c ← x + 0C;
};
SingleFromToken:
PUBLIC
PROC [get: GetClosure, token: Token]
RETURNS [c:
CHAR] = {
takes a Token into a CHAR
signals WrongKind if token.kind # tokenSINGLE
IF token.kind # tokenSINGLE THEN ERROR WrongKind;
c ← get.proc[get.data, token.start];
};
RopeFromToken:
PUBLIC
PROC [get: GetClosure, token: Token]
RETURNS [new:
ROPE] = {
takes a Token into a ROPE (parses the literal)
signals WrongKind if token.kind # tokenROPE
index: INT ← token.start + 1;
end: INT ← token.next - 1;
escaped: BOOL ← FALSE;
res: INT ← 0;
gp:
PROC
RETURNS [c:
CHAR] = {
IF index > end THEN RETURN [0C];
c ← get.proc[get.data, index];
index ← index + 1;
res ← res + 1;
SELECT c
FROM
'\\ => {[c, index] ← ParseEscapeCode[get, index]; escaped ← TRUE};
'" => {index ← index + 1; escaped ← TRUE};
ENDCASE;
};
IF token.kind # tokenROPE THEN ERROR WrongKind;
SELECT get.proc[get.data, end]
FROM
'l, 'L => end ← end - 2;
'" => end ← end - 1;
ENDCASE;
new ← Rope.FromProc[end - index + 1, gp];
IF escaped THEN new ← new.Substr[0, res];
};
AtomFromToken:
PUBLIC
PROC [get: GetClosure, token: Token]
RETURNS [
ATOM] = {
takes a Token into a ATOM (parses the literal)
signals WrongKind if token.kind # tokenATOM
IF token.kind # tokenATOM THEN ERROR WrongKind;
RETURN[Atom.MakeAtom[ExtractRope[get, token.start+1, token.next]]];
};
ContentsFromToken:
PUBLIC
PROC [get: GetClosure, token: Token]
RETURNS [
ROPE] = {
gets the contents of the token as a ROPE
can be used on any token
RETURN [ExtractRope[get, token.start, token.next]];
};
WrongKind: PUBLIC ERROR = CODE;
IntegerOverflow:
PUBLIC
ERROR =
CODE;
utility routines
ExtractRope:
PROC [get: GetClosure, start,end:
INT]
RETURNS [
ROPE] = {
size: INT ← end - start;
gp:
PROC
RETURNS [c:
CHAR] = {
c ← get.proc[get.data, start];
start ← start + 1};
RETURN[Rope.FromProc[size, gp]];
};
ParseEscapeCode:
PROC [get: GetClosure, index:
INT]
RETURNS [c: CHAR, next: INT] = {
c ← get.proc[get.data, index];
next ← index + 1;
SELECT c
FROM
'n, 'N => c ← Ascii.CR;
'r, 'R => c ← Ascii.CR;
't, 'T => c ← Ascii.TAB;
'b, 'B => c ← Ascii.BS;
'f, 'F => c ← Ascii.FF;
'l, 'L => c ← Ascii.LF;
IN ['0..'3] => {
cc: CHAR;
c ← 0C + (c-'0);
cc ← get.proc[get.data, next];
IF cc
IN ['0..'7]
THEN {
c ← 0C + ((cc-'0) + (c-0C)*10B);
cc ← get.proc[get.data, next ← next + 1];
IF cc
IN ['0..'7]
THEN {
c ← 0C + ((cc-'0) + (c-0C)*10B);
next ← next + 1}}};
ENDCASE;
};
test facilities
TestResults:
TYPE =
RECORD [token: Token, contents:
ROPE, literal:
REF];
get: GetProc = {
rope: ROPE = NARROW[data];
IF index < rope.Size[] THEN RETURN [rope.Fetch[index]];
RETURN [NUL];
};
getClosure: GetClosure ← [get];
tget: GetProc = {
c: CHAR ← get[getClosure.data, index];
IF c = '& THEN c ← 'A;
RETURN [c];
};
tgetClosure: GetClosure ← [tget];
Test:
PROC [rope:
ROPE]
RETURNS [list:
LIST
OF TestResults] = {
index: INT ← 0;
lag, temp: LIST OF TestResults ← NIL;
getClosure.data ← rope;
tgetClosure.data ← rope;
list ← NIL;
DO
token: Token ← GetToken[tgetClosure, index];
literal: REF ← NIL;
IF token.kind = tokenEOF THEN EXIT;
index ← token.next;
SELECT token.kind
FROM
tokenINT => literal ← NEW[INT ← IntFromToken[getClosure, token]];
tokenREAL => literal ← NEW[REAL ← RealFromToken[getClosure, token]];
tokenCHAR => literal ← NEW[CHAR ← CharFromToken[getClosure, token]];
tokenROPE => literal ← RopeFromToken[getClosure, token];
tokenATOM => literal ← AtomFromToken[getClosure, token];
ENDCASE;
temp ← LIST[[token, ContentsFromToken[getClosure, token], literal]];
IF lag = NIL THEN list ← temp ELSE lag.rest ← temp;
lag ← temp;
ENDLOOP;
};