-- file MesaScannerImpl.mesa -- last edit by Russ Atkinson, February 9, 1982 6:29 pm DIRECTORY Ascii: TYPE USING [BS, CR, FF, LF, NUL, TAB], Atom: TYPE USING [MakeAtom], MesaScanner: TYPE USING [GetProcType, Token, TokenKind], Real: TYPE USING [ReadReal], Rope: TYPE USING [Fetch, FromProc, ROPE, Size, Substr]; MesaScannerImpl: PROGRAM IMPORTS Atom, Real, Rope EXPORTS MesaScanner SHARES Rope = BEGIN OPEN Ascii, MesaScanner, Rope; ControlZ: CHAR = 32C; -- Bravo escape char ControlCR: CHAR = 215C; -- control CR (Bravo relic) LastIntDiv10: INT = LAST[INT]/10; LastCardDiv8: INT = LAST[LONG CARDINAL]/8; GetToken: PUBLIC PROC [get: GetProcType,index: INT] RETURNS [token: Token] = { FirstChar: PROC [kind: TokenKind] = { -- accept the current char, set the kind -- and grab the next character token.kind _ kind; token.start _ index - 1; token.next _ index; char _ get[index]; index _ index + 1; }; AddChar: PROC [] = { -- just accept the current character token.next _ index}; AddCharPlus: PROC [] = { -- accept the current character & get the next one token.next _ index; char _ get[index]; index _ index + 1}; NextChar: PROC = { -- grab the next char, but don't add it to the token char _ get[index]; index _ index + 1}; AcceptEscapeCode: PROC RETURNS [BOOL] = { -- assume that the last char accepted was '\\ -- and that char is the 1st char in the escape code SELECT char FROM 'n, 'N, 'r, 'R, 't, 'T, 'b, 'B, 'f, 'F, 'l, 'L, '', '", '\\ => {AddCharPlus[]; RETURN [TRUE]}; IN ['0..'3] => {AddCharPlus[]; IF char IN ['0..'7] THEN {AddCharPlus[]; IF char IN ['0..'7] THEN {AddCharPlus[]; RETURN [TRUE]}}}; ENDCASE; token.msg _ "invalid escape code"; RETURN [FALSE]; }; char: CHAR _ get[index]; index _ index + 1; token.msg _ NIL; DO -- at start of token (we think) {lag: CHAR _ char; SELECT char FROM ControlZ => {DO -- skip over the Bravo garbage NextChar[]; SELECT char FROM NUL => IF lag = NUL THEN GO TO EndOfFile; CR, ControlCR => EXIT; ENDCASE; lag _ char; ENDLOOP; }; IN [NUL..' ], ControlCR => {NextChar[]; IF char = NUL AND lag = NUL THEN GO TO EndOfFile}; IN ['a..'z], IN ['A..'Z] => {FirstChar[tokenID]; DO SELECT char FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {AddCharPlus[]; LOOP}; ENDCASE => RETURN; ENDLOOP; }; '0, '1, '2, '3, '4, '5, '6, '7, '8, '9 => {-- accept the first run of digits card: CARDINAL _ char - '0; octalValid: BOOL _ card < 8; small: BOOL _ octalValid; FirstChar[tokenINT]; WHILE char IN ['0..'9] DO octalValid _ octalValid AND char IN ['0..'7]; card _ card * 8 + (char-'0); small _ small AND octalValid AND card < 400B; AddCharPlus[]; ENDLOOP; IF char = '. THEN {-- determine whether there is a fraction NextChar[]; IF char = '. THEN RETURN; -- no fraction token.kind _ tokenREAL; AddChar[]; WHILE char IN ['0..'9] DO AddCharPlus[]; ENDLOOP}; SELECT char FROM 'e, 'E => {-- accept the exponent AddCharPlus[]; IF char = '- OR char = '+ THEN AddCharPlus[]; WHILE char IN ['0..'9] DO AddCharPlus[]; ENDLOOP; token.kind _ tokenREAL}; 'b, 'B => {IF token.kind = tokenREAL THEN RETURN; AddCharPlus[]; IF NOT octalValid THEN {token.msg _ "invalid octal constant"; GO TO Error}}; 'c, 'C => {IF token.kind = tokenREAL THEN RETURN; AddCharPlus[]; IF NOT small THEN {token.msg _ "invalid character code"; GO TO Error}; token.kind _ tokenCHAR}; ENDCASE; RETURN}; ',, ';, ':, '_, '#, '~, '+, '*, '/, '^, '@, '!, '(, '), '[, '], '{, '} => {FirstChar[tokenSINGLE]; RETURN}; '' => {FirstChar[tokenCHAR]; lag _ char; AddCharPlus[]; IF lag # '\\ THEN RETURN; IF AcceptEscapeCode[] THEN RETURN; GO TO Error; }; '" => {FirstChar[tokenROPE]; DO -- eat up the string/rope literal SELECT char FROM '" => {AddCharPlus[]; IF char # '" THEN EXIT}; '\\ => {AddCharPlus[]; IF AcceptEscapeCode[] THEN LOOP; GO TO Error}; NUL => {AddCharPlus[]; IF char # NUL THEN LOOP; token.msg _ "end-of-file in string literal"; GO TO Error}; ENDCASE; AddCharPlus[]; ENDLOOP; -- accept trailing L (for local frame designation) IF char = 'L OR char = 'l THEN AddCharPlus[]; RETURN; }; '$ => {FirstChar[tokenATOM]; SELECT char FROM IN ['a..'z], IN ['A..'Z] => {}; ENDCASE => {token.msg _ "invalid atom"; GO TO Error}; DO -- accumulate rest of atom name SELECT char FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {}; ENDCASE => EXIT; AddCharPlus[] ENDLOOP; RETURN}; '- => {-- tokenMINUS or comment processing FirstChar[tokenSINGLE]; IF char # '- THEN RETURN; token.kind _ tokenCOMMENT; lag _ CR; -- now we have started a comment DO AddCharPlus[]; SELECT char FROM CR, ControlCR => GO TO LastOne; '- => IF lag = '- THEN GO TO LastOne; NUL => IF lag = NUL THEN {token.next _ index - 2; RETURN}; ENDCASE; lag _ char; ENDLOOP; }; '. => {-- either a REAL or a dot or a dotdot FirstChar[tokenSINGLE]; IF char = '. THEN GO TO Double; IF char NOT IN ['0..'9] THEN RETURN; token.kind _ tokenREAL; AddCharPlus[]; WHILE char IN ['0..'9] DO AddCharPlus[]; ENDLOOP; IF char = 'E OR char = 'e THEN {-- accept the exponent AddCharPlus[]; IF char = '- OR char = '+ THEN AddCharPlus[]; WHILE char IN ['0..'9] DO AddCharPlus[]; ENDLOOP}; RETURN; }; '= => {-- either '=' or '=>' FirstChar[tokenSINGLE]; IF char = '> THEN GO TO Double ELSE RETURN}; '>, '< => {-- either '>' or '>=' (or '<' or '<=') FirstChar[tokenSINGLE]; IF char = '= THEN GO TO Double ELSE RETURN}; ENDCASE => {FirstChar[tokenERROR]; token.msg _ "invalid character"; RETURN}; EXITS Error => {token.kind _ tokenERROR; RETURN}; Double => {token.kind _ tokenDOUBLE; token.next _ index; RETURN}; LastOne => {token.next _ index; RETURN}; EndOfFile => {token.start _ token.next _ index - 2; token.kind _ tokenEOF; RETURN}}; ENDLOOP; }; RealFromToken: PUBLIC PROC [get: GetProcType, token: Token] RETURNS [REAL] = { -- takes a Token into a REAL (parses the literal) -- signals WrongKind if token.kind # tokenREAL index: INT _ token.start; gp: PROC RETURNS [c: CHAR] = { c _ get[index]; index _ index + 1; }; IF token.kind # tokenREAL THEN ERROR WrongKind; RETURN [Real.ReadReal[gp]]; }; IntFromToken: PUBLIC PROC [get: GetProcType, token: Token] RETURNS [x: INT] = { -- takes a Token into a INT (parses the literal) -- signals WrongKind if token.kind # tokenINT base: NAT _ 10; lc: CHAR _ 0C; end: INT _ token.next - 1; over: INT _ LastIntDiv10; IF token.kind # tokenINT THEN ERROR WrongKind; lc _ get[end]; x _ 0; IF lc = 'b OR lc = 'B THEN {base _ 8; end _ end - 1; over _ LastCardDiv8}; FOR i: INT IN [token.start..end] DO IF x > over THEN ERROR IntegerOverflow; x _ x * base + (get[i] - '0); IF x < 0 AND base = 10 THEN ERROR IntegerOverflow; ENDLOOP; }; CharFromToken: PUBLIC PROC [get: GetProcType, token: Token] RETURNS [c: CHAR] = { -- takes a Token into a CHAR (parses the literal) -- signals WrongKind if token.kind # tokenCHAR IF token.kind # tokenCHAR THEN ERROR WrongKind; IF get[token.start] = '' THEN IF (c _ get[token.start+1]) = '\\ THEN RETURN [ParseEscapeCode[get, token.start+2].c] ELSE RETURN; RETURN [ParseEscapeCode[get, token.start].c]; }; RopeFromToken: PUBLIC PROC [get: GetProcType, token: Token] RETURNS [new: ROPE] = { -- takes a Token into a ROPE (parses the literal) -- signals WrongKind if token.kind # tokenROPE index: INT _ token.start + 1; end: INT _ token.next - 1; escaped: BOOL _ FALSE; res: INT _ 0; gp: PROC RETURNS [c: CHAR] = { IF index >= end THEN RETURN [0C]; c _ get[index]; index _ index + 1; res _ res + 1; IF c = '" THEN {index _ index + 1; escaped _ TRUE}; IF c # '\\ THEN RETURN; [c, index] _ ParseEscapeCode[get, index+1]; escaped _ TRUE}; IF token.kind # tokenROPE THEN ERROR WrongKind; IF get[end] = '" THEN end _ end - 1; new _ Rope.FromProc[end - index, gp]; IF escaped THEN new _ new.Substr[0, res]; }; AtomFromToken: PUBLIC PROC [get: GetProcType, token: Token] RETURNS [ATOM] = { -- takes a Token into a ATOM (parses the literal) -- signals WrongKind if token.kind # tokenATOM IF token.kind # tokenATOM THEN ERROR WrongKind; RETURN[Atom.MakeAtom[ExtractRope[get, token.start+1, token.next]]]; }; ContentsFromToken: PUBLIC PROC [get: GetProcType, token: Token] RETURNS [ROPE] = { -- gets the contents of the token as a ROPE -- can be used on any token RETURN [ExtractRope[get, token.start, token.next]]; }; WrongKind: PUBLIC ERROR = CODE; IntegerOverflow: PUBLIC ERROR = CODE; -- utility routines ExtractRope: PROC [get: GetProcType, start,end: INT] RETURNS [ROPE] = { size: INT _ end - start; gp: PROC RETURNS [c: CHAR] = { c _ get[start]; start _ start + 1}; RETURN[Rope.FromProc[size, gp]]; }; ParseEscapeCode: PROC [get: GetProcType, index: INT] RETURNS [c: CHAR, next: INT] = { c _ get[index]; next _ index + 1; SELECT c FROM 'n, 'N => c _ Ascii.CR; 'r, 'R => c _ Ascii.CR; 't, 'T => c _ Ascii.TAB; 'b, 'B => c _ Ascii.BS; 'f, 'F => c _ Ascii.FF; 'l, 'L => c _ Ascii.LF; IN ['0..'3] => {cc: CHAR; c _ 0C + (c-'0); cc _ get[next]; IF cc IN ['0..'7] THEN {c _ 0C + ((cc-'0) + (c-0C)*10B); cc _ get[next _ next + 1]; IF cc IN ['0..'7] THEN {c _ 0C + ((cc-'0) + (c-0C)*10B); next _ next + 1}}}; ENDCASE; }; -- test facilities TestResults: TYPE = RECORD [token: Token, contents: ROPE, literal: REF]; Test: PROC [rope: ROPE] RETURNS [list: LIST OF TestResults] = { get: GetProcType = { IF index < rope.Size[] THEN RETURN [rope.Fetch[index]]; RETURN [NUL]; }; tget: GetProcType = { c: CHAR _ get[index]; IF c = '& THEN c _ 'A; RETURN [c]; }; index: INT _ 0; lag, temp: LIST OF TestResults _ NIL; list _ NIL; DO token: Token _ GetToken[tget, index]; literal: REF _ NIL; IF token.kind = tokenEOF THEN EXIT; index _ token.next; SELECT token.kind FROM tokenINT => literal _ NEW[INT _ IntFromToken[get, token]]; tokenREAL => literal _ NEW[REAL _ RealFromToken[get, token]]; tokenCHAR => literal _ NEW[CHAR _ CharFromToken[get, token]]; tokenROPE => literal _ RopeFromToken[get, token]; tokenATOM => literal _ AtomFromToken[get, token]; ENDCASE; temp _ LIST[[token, ContentsFromToken[get, token], literal]]; IF lag = NIL THEN list _ temp ELSE lag.rest _ temp; lag _ temp; ENDLOOP; }; END.