-- file MesaScannerImpl.mesa -- last edit by Russ Atkinson, February 9, 1982 6:29 pm DIRECTORY Ascii: TYPE USING [BS, CR, FF, LF, NUL, TAB], Atom: TYPE USING [MakeAtom], MesaScanner: TYPE USING [GetProcType, Token, TokenKind], Real: TYPE USING [ReadReal], Rope: TYPE USING [Fetch, FromProc, ROPE, Size, Substr]; MesaScannerImpl: PROGRAM IMPORTS Atom, Real, Rope EXPORTS MesaScanner SHARES Rope = BEGIN OPEN Ascii, MesaScanner, Rope; ControlZ: CHAR = 32C; -- Bravo escape char ControlCR: CHAR = 215C; -- control CR (Bravo relic) LastIntDiv10: INT = LAST[INT]/10; LastCardDiv8: INT = LAST[LONG CARDINAL]/8; GetToken: PUBLIC PROC [get: GetProcType,index: INT] RETURNS [token: Token] = { FirstChar: PROC [kind: TokenKind] = { -- accept the current char, set the kind -- and grab the next character token.kind ← kind; token.start ← index - 1; token.next ← index; char ← get[index]; index ← index + 1; }; AddChar: PROC [] = { -- just accept the current character token.next ← index}; AddCharPlus: PROC [] = { -- accept the current character & get the next one token.next ← index; char ← get[index]; index ← index + 1}; NextChar: PROC = { -- grab the next char, but don't add it to the token char ← get[index]; index ← index + 1}; AcceptEscapeCode: PROC RETURNS [BOOL] = { -- assume that the last char accepted was '\\ -- and that char is the 1st char in the escape code SELECT char FROM 'n, 'N, 'r, 'R, 't, 'T, 'b, 'B, 'f, 'F, 'l, 'L, '', '", '\\ => {AddCharPlus[]; RETURN [TRUE]}; IN ['0..'3] => {AddCharPlus[]; IF char IN ['0..'7] THEN {AddCharPlus[]; IF char IN ['0..'7] THEN {AddCharPlus[]; RETURN [TRUE]}}}; ENDCASE; token.msg ← "invalid escape code"; RETURN [FALSE]; }; char: CHAR ← get[index]; index ← index + 1; token.msg ← NIL; DO -- at start of token (we think) {lag: CHAR ← char; SELECT char FROM ControlZ => {DO -- skip over the Bravo garbage NextChar[]; SELECT char FROM NUL => IF lag = NUL THEN GO TO EndOfFile; CR, ControlCR => EXIT; ENDCASE; lag ← char; ENDLOOP; }; IN [NUL..' ], ControlCR => {NextChar[]; IF char = NUL AND lag = NUL THEN GO TO EndOfFile}; IN ['a..'z], IN ['A..'Z] => {FirstChar[tokenID]; DO SELECT char FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {AddCharPlus[]; LOOP}; ENDCASE => RETURN; ENDLOOP; }; '0, '1, '2, '3, '4, '5, '6, '7, '8, '9 => {-- accept the first run of digits card: CARDINAL ← char - '0; octalValid: BOOL ← card < 8; small: BOOL ← octalValid; FirstChar[tokenINT]; WHILE char IN ['0..'9] DO octalValid ← octalValid AND char IN ['0..'7]; card ← card * 8 + (char-'0); small ← small AND octalValid AND card < 400B; AddCharPlus[]; ENDLOOP; IF char = '. THEN {-- determine whether there is a fraction NextChar[]; IF char = '. THEN RETURN; -- no fraction token.kind ← tokenREAL; AddChar[]; WHILE char IN ['0..'9] DO AddCharPlus[]; ENDLOOP}; SELECT char FROM 'e, 'E => {-- accept the exponent AddCharPlus[]; IF char = '- OR char = '+ THEN AddCharPlus[]; WHILE char IN ['0..'9] DO AddCharPlus[]; ENDLOOP; token.kind ← tokenREAL}; 'b, 'B => {IF token.kind = tokenREAL THEN RETURN; AddCharPlus[]; IF NOT octalValid THEN {token.msg ← "invalid octal constant"; GO TO Error}}; 'c, 'C => {IF token.kind = tokenREAL THEN RETURN; AddCharPlus[]; IF NOT small THEN {token.msg ← "invalid character code"; GO TO Error}; token.kind ← tokenCHAR}; ENDCASE; RETURN}; ',, ';, ':, '←, '#, '~, '+, '*, '/, '↑, '@, '!, '(, '), '[, '], '{, '} => {FirstChar[tokenSINGLE]; RETURN}; '' => {FirstChar[tokenCHAR]; lag ← char; AddCharPlus[]; IF lag # '\\ THEN RETURN; IF AcceptEscapeCode[] THEN RETURN; GO TO Error; }; '" => {FirstChar[tokenROPE]; DO -- eat up the string/rope literal SELECT char FROM '" => {AddCharPlus[]; IF char # '" THEN EXIT}; '\\ => {AddCharPlus[]; IF AcceptEscapeCode[] THEN LOOP; GO TO Error}; NUL => {AddCharPlus[]; IF char # NUL THEN LOOP; token.msg ← "end-of-file in string literal"; GO TO Error}; ENDCASE; AddCharPlus[]; ENDLOOP; -- accept trailing L (for local frame designation) IF char = 'L OR char = 'l THEN AddCharPlus[]; RETURN; }; '$ => {FirstChar[tokenATOM]; SELECT char FROM IN ['a..'z], IN ['A..'Z] => {}; ENDCASE => {token.msg ← "invalid atom"; GO TO Error}; DO -- accumulate rest of atom name SELECT char FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {}; ENDCASE => EXIT; AddCharPlus[] ENDLOOP; RETURN}; '- => {-- tokenMINUS or comment processing FirstChar[tokenSINGLE]; IF char # '- THEN RETURN; token.kind ← tokenCOMMENT; lag ← CR; -- now we have started a comment DO AddCharPlus[]; SELECT char FROM CR, ControlCR => GO TO LastOne; '- => IF lag = '- THEN GO TO LastOne; NUL => IF lag = NUL THEN {token.next ← index - 2; RETURN}; ENDCASE; lag ← char; ENDLOOP; }; '. => {-- either a REAL or a dot or a dotdot FirstChar[tokenSINGLE]; IF char = '. THEN GO TO Double; IF char NOT IN ['0..'9] THEN RETURN; token.kind ← tokenREAL; AddCharPlus[]; WHILE char IN ['0..'9] DO AddCharPlus[]; ENDLOOP; IF char = 'E OR char = 'e THEN {-- accept the exponent AddCharPlus[]; IF char = '- OR char = '+ THEN AddCharPlus[]; WHILE char IN ['0..'9] DO AddCharPlus[]; ENDLOOP}; RETURN; }; '= => {-- either '=' or '=>' FirstChar[tokenSINGLE]; IF char = '> THEN GO TO Double ELSE RETURN}; '>, '< => {-- either '>' or '>=' (or '<' or '<=') FirstChar[tokenSINGLE]; IF char = '= THEN GO TO Double ELSE RETURN}; ENDCASE => {FirstChar[tokenERROR]; token.msg ← "invalid character"; RETURN}; EXITS Error => {token.kind ← tokenERROR; RETURN}; Double => {token.kind ← tokenDOUBLE; token.next ← index; RETURN}; LastOne => {token.next ← index; RETURN}; EndOfFile => {token.start ← token.next ← index - 2; token.kind ← tokenEOF; RETURN}}; ENDLOOP; }; RealFromToken: PUBLIC PROC [get: GetProcType, token: Token] RETURNS [REAL] = { -- takes a Token into a REAL (parses the literal) -- signals WrongKind if token.kind # tokenREAL index: INT ← token.start; gp: PROC RETURNS [c: CHAR] = { c ← get[index]; index ← index + 1; }; IF token.kind # tokenREAL THEN ERROR WrongKind; RETURN [Real.ReadReal[gp]]; }; IntFromToken: PUBLIC PROC [get: GetProcType, token: Token] RETURNS [x: INT] = { -- takes a Token into a INT (parses the literal) -- signals WrongKind if token.kind # tokenINT base: NAT ← 10; lc: CHAR ← 0C; end: INT ← token.next - 1; over: INT ← LastIntDiv10; IF token.kind # tokenINT THEN ERROR WrongKind; lc ← get[end]; x ← 0; IF lc = 'b OR lc = 'B THEN {base ← 8; end ← end - 1; over ← LastCardDiv8}; FOR i: INT IN [token.start..end] DO IF x > over THEN ERROR IntegerOverflow; x ← x * base + (get[i] - '0); IF x < 0 AND base = 10 THEN ERROR IntegerOverflow; ENDLOOP; }; CharFromToken: PUBLIC PROC [get: GetProcType, token: Token] RETURNS [c: CHAR] = { -- takes a Token into a CHAR (parses the literal) -- signals WrongKind if token.kind # tokenCHAR IF token.kind # tokenCHAR THEN ERROR WrongKind; IF get[token.start] = '' THEN IF (c ← get[token.start+1]) = '\\ THEN RETURN [ParseEscapeCode[get, token.start+2].c] ELSE RETURN; RETURN [ParseEscapeCode[get, token.start].c]; }; RopeFromToken: PUBLIC PROC [get: GetProcType, token: Token] RETURNS [new: ROPE] = { -- takes a Token into a ROPE (parses the literal) -- signals WrongKind if token.kind # tokenROPE index: INT ← token.start + 1; end: INT ← token.next - 1; escaped: BOOL ← FALSE; res: INT ← 0; gp: PROC RETURNS [c: CHAR] = { IF index >= end THEN RETURN [0C]; c ← get[index]; index ← index + 1; res ← res + 1; IF c = '" THEN {index ← index + 1; escaped ← TRUE}; IF c # '\\ THEN RETURN; [c, index] ← ParseEscapeCode[get, index+1]; escaped ← TRUE}; IF token.kind # tokenROPE THEN ERROR WrongKind; IF get[end] = '" THEN end ← end - 1; new ← Rope.FromProc[end - index, gp]; IF escaped THEN new ← new.Substr[0, res]; }; AtomFromToken: PUBLIC PROC [get: GetProcType, token: Token] RETURNS [ATOM] = { -- takes a Token into a ATOM (parses the literal) -- signals WrongKind if token.kind # tokenATOM IF token.kind # tokenATOM THEN ERROR WrongKind; RETURN[Atom.MakeAtom[ExtractRope[get, token.start+1, token.next]]]; }; ContentsFromToken: PUBLIC PROC [get: GetProcType, token: Token] RETURNS [ROPE] = { -- gets the contents of the token as a ROPE -- can be used on any token RETURN [ExtractRope[get, token.start, token.next]]; }; WrongKind: PUBLIC ERROR = CODE; IntegerOverflow: PUBLIC ERROR = CODE; -- utility routines ExtractRope: PROC [get: GetProcType, start,end: INT] RETURNS [ROPE] = { size: INT ← end - start; gp: PROC RETURNS [c: CHAR] = { c ← get[start]; start ← start + 1}; RETURN[Rope.FromProc[size, gp]]; }; ParseEscapeCode: PROC [get: GetProcType, index: INT] RETURNS [c: CHAR, next: INT] = { c ← get[index]; next ← index + 1; SELECT c FROM 'n, 'N => c ← Ascii.CR; 'r, 'R => c ← Ascii.CR; 't, 'T => c ← Ascii.TAB; 'b, 'B => c ← Ascii.BS; 'f, 'F => c ← Ascii.FF; 'l, 'L => c ← Ascii.LF; IN ['0..'3] => {cc: CHAR; c ← 0C + (c-'0); cc ← get[next]; IF cc IN ['0..'7] THEN {c ← 0C + ((cc-'0) + (c-0C)*10B); cc ← get[next ← next + 1]; IF cc IN ['0..'7] THEN {c ← 0C + ((cc-'0) + (c-0C)*10B); next ← next + 1}}}; ENDCASE; }; -- test facilities TestResults: TYPE = RECORD [token: Token, contents: ROPE, literal: REF]; Test: PROC [rope: ROPE] RETURNS [list: LIST OF TestResults] = { get: GetProcType = { IF index < rope.Size[] THEN RETURN [rope.Fetch[index]]; RETURN [NUL]; }; tget: GetProcType = { c: CHAR ← get[index]; IF c = '& THEN c ← 'A; RETURN [c]; }; index: INT ← 0; lag, temp: LIST OF TestResults ← NIL; list ← NIL; DO token: Token ← GetToken[tget, index]; literal: REF ← NIL; IF token.kind = tokenEOF THEN EXIT; index ← token.next; SELECT token.kind FROM tokenINT => literal ← NEW[INT ← IntFromToken[get, token]]; tokenREAL => literal ← NEW[REAL ← RealFromToken[get, token]]; tokenCHAR => literal ← NEW[CHAR ← CharFromToken[get, token]]; tokenROPE => literal ← RopeFromToken[get, token]; tokenATOM => literal ← AtomFromToken[get, token]; ENDCASE; temp ← LIST[[token, ContentsFromToken[get, token], literal]]; IF lag = NIL THEN list ← temp ELSE lag.rest ← temp; lag ← temp; ENDLOOP; }; END.