DIRECTORY Ascii USING [BS, CR, FF, LF, NUL, TAB], Atom USING [MakeAtom], CedarScanner USING [GetClosure, GetProc, Token, TokenKind], Real USING [ReadReal], Rope USING [Fetch, FromProc, ROPE, Size, Substr]; CedarScannerImpl: CEDAR PROGRAM IMPORTS Atom, Real, Rope EXPORTS CedarScanner SHARES Rope = BEGIN OPEN Ascii, CedarScanner, Rope; ControlZ: CHAR = 32C; -- Bravo escape char ControlCR: CHAR = 215C; -- control CR (Bravo relic) LastIntDiv10: INT = LAST[INT]/10; LastCardDiv8: INT = LAST[LONG CARDINAL]/8; GetToken: PUBLIC PROC [get: GetClosure,index: INT] RETURNS [token: Token] = { FirstChar: PROC [kind: TokenKind] = { token.kind _ kind; token.start _ index - 1; token.next _ index; char _ get.proc[get.data, index]; index _ index + 1; }; AddChar: PROC [] = { token.next _ index}; AddCharPlus: PROC [] = { token.next _ index; char _ get.proc[get.data, index]; index _ index + 1}; NextChar: PROC = { char _ get.proc[get.data, index]; index _ index + 1}; AcceptEscapeCode: PROC RETURNS [BOOL] = { SELECT char FROM 'n, 'N, 'r, 'R, 't, 'T, 'b, 'B, 'f, 'F, 'l, 'L, '', '", '\\ => {AddCharPlus[]; RETURN [TRUE]}; IN ['0..'3] => {AddCharPlus[]; IF char IN ['0..'7] THEN {AddCharPlus[]; IF char IN ['0..'7] THEN {AddCharPlus[]; RETURN [TRUE]}}}; ENDCASE; token.msg _ "invalid escape code"; RETURN [FALSE]; }; char: CHAR _ get.proc[get.data, index]; index _ index + 1; token.msg _ NIL; DO -- at start of token (we think) {lag: CHAR _ char; SELECT char FROM ControlZ => {DO -- skip over the Bravo garbage NextChar[]; SELECT char FROM NUL => IF lag = NUL THEN GO TO EndOfFile; CR, ControlCR => EXIT; ENDCASE; lag _ char; ENDLOOP; }; IN [NUL..' ], ControlCR => {NextChar[]; IF char = NUL AND lag = NUL THEN GO TO EndOfFile}; IN ['a..'z], IN ['A..'Z] => {FirstChar[tokenID]; DO SELECT char FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {AddCharPlus[]; LOOP}; ENDCASE => RETURN; ENDLOOP; }; '0, '1, '2, '3, '4, '5, '6, '7, '8, '9 => {-- accept the first run of digits card: CARDINAL _ char - '0; octalValid: BOOL _ card < 8; small: BOOL _ octalValid; hexLast: INT _ -1; FirstChar[tokenINT]; WHILE char IN ['0..'9] DO octalValid _ octalValid AND char IN ['0..'7]; card _ card * 8 + (char-'0); small _ small AND octalValid AND card < 400B; AddCharPlus[]; ENDLOOP; hexLast _ index-1; DO tc: CHAR _ get.proc[get.data, hexLast]; SELECT tc FROM IN ['0..'9], IN ['A..'F], IN ['a..'f] => {hexLast _ hexLast + 1; LOOP}; 'X, 'x, 'H, 'h => {-- a hex constant! token.next _ hexLast + 1; RETURN; }; ENDCASE => EXIT; ENDLOOP; IF char = '. THEN {-- determine whether there is a fraction NextChar[]; IF char = '. THEN RETURN; -- no fraction token.kind _ tokenREAL; AddChar[]; WHILE char IN ['0..'9] DO AddCharPlus[]; ENDLOOP}; SELECT char FROM 'e, 'E => {-- accept the exponent AddCharPlus[]; IF char = '- OR char = '+ THEN AddCharPlus[]; WHILE char IN ['0..'9] DO AddCharPlus[]; ENDLOOP; token.kind _ tokenREAL}; 'b, 'B => {IF token.kind = tokenREAL THEN RETURN; AddCharPlus[]; IF NOT octalValid THEN {token.msg _ "invalid octal constant"; GO TO Error}}; 'c, 'C => {IF token.kind = tokenREAL THEN RETURN; AddCharPlus[]; IF NOT small OR card > 377B THEN {token.msg _ "invalid character code"; GO TO Error}; token.kind _ tokenCHAR}; ENDCASE; RETURN}; ',, ';, ':, '_, '#, '~, '+, '*, '/, '^, '@, '!, '(, '), '[, '], '{, '} => {FirstChar[tokenSINGLE]; RETURN}; '' => {FirstChar[tokenCHAR]; lag _ char; AddCharPlus[]; IF lag = NUL AND char = NUL THEN {token.msg _ "end-of-file in char literal"; GOTO Error; }; IF lag # '\\ THEN RETURN; IF AcceptEscapeCode[] THEN RETURN; GO TO Error; }; '" => {FirstChar[tokenROPE]; DO -- eat up the string/rope literal SELECT char FROM '" => {AddCharPlus[]; IF char # '" THEN EXIT}; '\\ => {AddCharPlus[]; IF AcceptEscapeCode[] THEN LOOP; GO TO Error}; NUL => {AddCharPlus[]; IF char # NUL THEN LOOP; token.msg _ "end-of-file in string literal"; GO TO Error}; ENDCASE; AddCharPlus[]; ENDLOOP; IF char = 'L OR char = 'l THEN AddCharPlus[]; RETURN; }; '$ => {FirstChar[tokenATOM]; SELECT char FROM IN ['a..'z], IN ['A..'Z] => {}; ENDCASE => {token.msg _ "invalid atom"; GO TO Error}; DO -- accumulate rest of atom name SELECT char FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {}; ENDCASE => EXIT; AddCharPlus[] ENDLOOP; RETURN}; '- => {-- tokenMINUS or comment processing FirstChar[tokenSINGLE]; IF char # '- THEN RETURN; token.kind _ tokenCOMMENT; lag _ CR; DO AddCharPlus[]; SELECT char FROM CR, ControlCR => GO TO LastOne; '- => IF lag = '- THEN GO TO LastOne; NUL => IF lag = NUL THEN {token.next _ index - 2; RETURN}; ENDCASE; lag _ char; ENDLOOP; }; '. => {-- either a REAL or a dot or a dotdot FirstChar[tokenSINGLE]; IF char = '. THEN GO TO Double; IF char NOT IN ['0..'9] THEN RETURN; token.kind _ tokenREAL; AddCharPlus[]; WHILE char IN ['0..'9] DO AddCharPlus[]; ENDLOOP; IF char = 'E OR char = 'e THEN {-- accept the exponent AddCharPlus[]; IF char = '- OR char = '+ THEN AddCharPlus[]; WHILE char IN ['0..'9] DO AddCharPlus[]; ENDLOOP}; RETURN; }; '= => {-- either '=' or '=>' FirstChar[tokenSINGLE]; IF char = '> THEN GO TO Double ELSE RETURN}; '>, '< => {-- either '>' or '>=' (or '<' or '<=') FirstChar[tokenSINGLE]; IF char = '= THEN GO TO Double ELSE RETURN}; ENDCASE => {FirstChar[tokenERROR]; token.msg _ "invalid character"; RETURN}; EXITS Error => {token.kind _ tokenERROR; RETURN}; Double => {token.kind _ tokenDOUBLE; token.next _ index; RETURN}; LastOne => {token.next _ index; RETURN}; EndOfFile => {token.start _ token.next _ index - 2; token.kind _ tokenEOF; RETURN}}; ENDLOOP; }; RealFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [REAL] = TRUSTED { index: INT _ token.start; gp: PROC RETURNS [c: CHAR] = TRUSTED { c _ get.proc[get.data, index]; index _ index + 1; }; IF token.kind # tokenREAL THEN ERROR WrongKind; RETURN [Real.ReadReal[gp]]; }; IntFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [x: INT _ 0] = { base: NAT _ 10; lc: CHAR _ 0C; end: INT _ token.next - 1; over: INT _ LastIntDiv10; IF token.kind # tokenINT THEN ERROR WrongKind; lc _ get.proc[get.data, end]; SELECT lc FROM 'H, 'h, 'X, 'x => {FOR i: INT IN [token.start..end-1] DO c: CHAR _ get.proc[get.data, i]; SELECT c FROM IN ['0..'9] => x _ x * 16 + (get.proc[get.data, i] - '0); IN ['A..'F] => x _ x * 16 + (get.proc[get.data, i] - 'A) + 10; IN ['a..'f] => x _ x * 16 + (get.proc[get.data, i] - 'a) + 10; ENDCASE; ENDLOOP; RETURN}; 'B, 'b => {base _ 8; end _ end - 1; over _ LastCardDiv8}; ENDCASE; FOR i: INT IN [token.start..end] DO IF x > over THEN ERROR IntegerOverflow; x _ x * base + (get.proc[get.data, i] - '0); IF x < 0 AND base = 10 THEN ERROR IntegerOverflow; ENDLOOP; }; CharFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [c: CHAR] = { x: CARDINAL _ 0; IF token.kind # tokenCHAR THEN ERROR WrongKind; IF get.proc[get.data, token.start] = '' THEN IF (c _ get.proc[get.data, token.start+1]) = '\\ THEN RETURN [ParseEscapeCode[get, token.start+2].c] ELSE RETURN; FOR i: INT IN [token.start..token.next - 2] DO tc: CHAR _ get.proc[get.data, i]; x _ x * 8 + (tc - '0); ENDLOOP; c _ x + 0C; }; SingleFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [c: CHAR] = { IF token.kind # tokenSINGLE THEN ERROR WrongKind; c _ get.proc[get.data, token.start]; }; RopeFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [new: ROPE] = { index: INT _ token.start + 1; end: INT _ token.next - 1; escaped: BOOL _ FALSE; res: INT _ 0; gp: PROC RETURNS [c: CHAR] = { IF index > end THEN RETURN [0C]; c _ get.proc[get.data, index]; index _ index + 1; res _ res + 1; SELECT c FROM '\\ => {[c, index] _ ParseEscapeCode[get, index]; escaped _ TRUE}; '" => {index _ index + 1; escaped _ TRUE}; ENDCASE; }; IF token.kind # tokenROPE THEN ERROR WrongKind; SELECT get.proc[get.data, end] FROM 'l, 'L => end _ end - 2; '" => end _ end - 1; ENDCASE; new _ Rope.FromProc[end - index + 1, gp]; IF escaped THEN new _ new.Substr[0, res]; }; AtomFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [ATOM] = { IF token.kind # tokenATOM THEN ERROR WrongKind; RETURN[Atom.MakeAtom[ExtractRope[get, token.start+1, token.next]]]; }; ContentsFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [ROPE] = { RETURN [ExtractRope[get, token.start, token.next]]; }; WrongKind: PUBLIC ERROR = CODE; IntegerOverflow: PUBLIC ERROR = CODE; ExtractRope: PROC [get: GetClosure, start,end: INT] RETURNS [ROPE] = { size: INT _ end - start; gp: PROC RETURNS [c: CHAR] = { c _ get.proc[get.data, start]; start _ start + 1}; RETURN[Rope.FromProc[size, gp]]; }; ParseEscapeCode: PROC [get: GetClosure, index: INT] RETURNS [c: CHAR, next: INT] = { c _ get.proc[get.data, index]; next _ index + 1; SELECT c FROM 'n, 'N => c _ Ascii.CR; 'r, 'R => c _ Ascii.CR; 't, 'T => c _ Ascii.TAB; 'b, 'B => c _ Ascii.BS; 'f, 'F => c _ Ascii.FF; 'l, 'L => c _ Ascii.LF; IN ['0..'3] => {cc: CHAR; c _ 0C + (c-'0); cc _ get.proc[get.data, next]; IF cc IN ['0..'7] THEN {c _ 0C + ((cc-'0) + (c-0C)*10B); cc _ get.proc[get.data, next _ next + 1]; IF cc IN ['0..'7] THEN {c _ 0C + ((cc-'0) + (c-0C)*10B); next _ next + 1}}}; ENDCASE; }; TestResults: TYPE = RECORD [token: Token, contents: ROPE, literal: REF]; get: GetProc = { rope: ROPE = NARROW[data]; IF index < rope.Size[] THEN RETURN [rope.Fetch[index]]; RETURN [NUL]; }; getClosure: GetClosure _ [get]; tget: GetProc = { c: CHAR _ get[getClosure.data, index]; IF c = '& THEN c _ 'A; RETURN [c]; }; tgetClosure: GetClosure _ [tget]; Test: PROC [rope: ROPE] RETURNS [list: LIST OF TestResults] = { index: INT _ 0; lag, temp: LIST OF TestResults _ NIL; getClosure.data _ rope; tgetClosure.data _ rope; list _ NIL; DO token: Token _ GetToken[tgetClosure, index]; literal: REF _ NIL; IF token.kind = tokenEOF THEN EXIT; index _ token.next; SELECT token.kind FROM tokenINT => literal _ NEW[INT _ IntFromToken[getClosure, token]]; tokenREAL => literal _ NEW[REAL _ RealFromToken[getClosure, token]]; tokenCHAR => literal _ NEW[CHAR _ CharFromToken[getClosure, token]]; tokenROPE => literal _ RopeFromToken[getClosure, token]; tokenATOM => literal _ AtomFromToken[getClosure, token]; ENDCASE; temp _ LIST[[token, ContentsFromToken[getClosure, token], literal]]; IF lag = NIL THEN list _ temp ELSE lag.rest _ temp; lag _ temp; ENDLOOP; }; END. ŠCedarScannerImpl.mesa Russ Atkinson, November 15, 1982 1:03 pm Last Edited by: Teitelman, January 24, 1983 1:32 pm accept the current char, set the kind and grab the next character just accept the current character accept the current character & get the next one grab the next char, but don't add it to the token assume that the last char accepted was '\\ and that char is the 1st char in the escape code very special scan to determine if this is hex accept trailing L (for local frame designation) now we have started a comment takes a Token into a REAL (parses the literal) signals WrongKind if token.kind # tokenREAL takes a Token into a INT (parses the literal) signals WrongKind if token.kind # tokenINT takes a Token into a CHAR (parses the literal) signals WrongKind if token.kind # tokenCHAR takes a Token into a CHAR signals WrongKind if token.kind # tokenSINGLE takes a Token into a ROPE (parses the literal) signals WrongKind if token.kind # tokenROPE takes a Token into a ATOM (parses the literal) signals WrongKind if token.kind # tokenATOM gets the contents of the token as a ROPE can be used on any token utility routines test facilities Ê;˜J˜Jšœ™Jšœ(™(J™3šÏk ˜ Jšœœœœœœœœ˜'Jšœœ ˜Jšœ œ)˜;Jšœœ ˜Jšœœœ˜1J˜—JšÐblœœ˜J˜Jšœ˜J˜Jšœ ˜J˜Jšœ˜ J˜šœœœ˜'J˜—Jšœ œÏc˜*Jšœ œ Ÿ˜4J˜Jšœœœœ˜!š œœœœœ˜*J˜—J˜š Ïnœœœœœ˜MJ˜š  œœ˜%Jšœ%™%Jšœ™J˜J˜J˜Jšœ!˜!J˜J˜J˜—š œœ˜Jšœ!™!J˜J˜—š  œœ˜Jšœ/™/J˜J˜!J˜J˜—š œœ˜Jšœ1™1J˜!J˜J˜—š œœœœ˜)Jšœ*™*Jšœ0™0šœ˜J˜˜Jšœœœ˜—šœ ˜˜šœœ ˜˜šœœ ˜˜Jšœœ˜—————Jšœ˜——J˜"Jšœœ˜J˜J˜—Jšœœ˜'J˜Jšœ œ˜J˜šœŸ˜"šœœ˜šœ˜˜ šœœŸ˜"J˜ šœ˜Jš œœœœœœ ˜)Jšœœ˜Jšœ˜—J˜ Jšœ˜J˜——šœœ˜˜ Jšœœœœœœœ ˜2——šœ œ ˜˜š˜šœ˜šœ œ œ ˜(Jšœœ˜—Jšœœ˜—Jšœ˜—J˜——˜*šœŸ"˜#Jšœœ ˜Jšœ œ ˜Jšœœ˜Jšœ œ˜J˜šœœ ˜Jšœœœ ˜-J˜Jšœœ œ ˜-J˜Jšœ˜ —Jšœ-™-J˜š˜Jšœœ˜'šœ˜šœ œ œ ˜(Jšœœ˜—˜šœŸ˜J˜Jšœ˜J˜——Jšœœ˜—Jšœ˜ —šœ œ˜šœŸ(˜)J˜ Jšœ œœŸ˜)J˜J˜ šœœ ˜J˜Jšœ˜ ———šœœ˜˜ šœŸ˜J˜Jšœ œ œ˜.šœœ ˜J˜Jšœ˜ —J˜——˜ šœœœœ˜'J˜šœœ ˜Jšœ'œœ ˜5———˜ šœœœœ˜'J˜šœœœ ˜ Jšœ'œœ˜4—J˜——Jšœ˜ —Jšœ˜J˜——˜JJšœœ˜!J˜—˜˜J˜ J˜š œœœœœ˜!Jšœ+˜+Jšœ˜ Jšœ˜—Jšœ œœ˜Jšœœœ˜"Jšœœ˜ J˜J˜——˜˜šœŸ!˜$šœœ˜˜˜Jšœ œœ˜——˜˜Jšœœœ˜ Jšœœ ˜——šœ˜˜Jšœœœœ˜J˜,Jšœœ˜ ——Jšœ˜—J˜Jšœ˜—Jšœ/™/Jšœ œ œ˜-Jšœ˜J˜J˜——˜˜šœœ˜Jšœ œ˜šœ˜ Jšœœœ ˜+——šœŸ˜"šœœ˜Jšœ œ œ˜,Jšœœ˜—J˜ Jšœ˜ —Jšœ˜J˜——šœŸ#˜*J˜Jšœ œœ˜J˜Jšœœ˜ Jšœ™š˜J˜šœ˜Jšœœœ ˜Jš œœ œœœ ˜%šœœœ˜˜Jšœ˜——Jšœ˜—J˜ Jšœ˜—J˜J˜—˜šœŸ%˜&J˜Jšœ œœœ˜Jš œœœ œœ˜$J˜J˜šœœ ˜J˜Jšœ˜—šœ œ ˜šœŸ˜J˜Jšœ œ œ˜-šœœ ˜J˜Jšœ˜ ———Jšœ˜J˜J˜——˜šœŸ˜J˜šœ ˜ Jšœœœ˜Jšœœ˜ J˜———˜ šœŸ&˜'J˜šœ ˜ Jšœœœ˜Jšœœ˜ J˜———šœ˜ ˜J˜ Jšœ˜———š˜˜˜Jšœ˜—˜ ˜J˜Jšœ˜———˜ ˜Jšœ˜——˜ ˜&J˜Jšœ˜ J˜————Jšœ˜ —J˜J˜—š   œœœ!œœœ˜UJšœ.™.Jšœ+™+Jšœœ˜š œœœœœ˜&J˜J˜J˜—Jšœœœ ˜/Jšœ˜J˜J˜—š   œœœ!œœ ˜RJšœ-™-Jšœ*™*Jšœœ˜Jšœœ˜Jšœœ˜Jšœœ˜Jšœœœ ˜.J˜šœ˜˜šœœœœ˜&Jšœœ˜ šœ˜ Jšœ8˜:Jšœ=˜?Jšœ<˜>Jšœ˜ —Jšœ˜Jšœ˜——˜ J˜/—Jšœ˜—šœœœ˜#Jšœ œœ˜'J˜-Jšœœ œœ˜3Jšœ˜—J˜J˜—š   œœœ!œœ˜PJšœ.™.Jšœ+™+Jšœœ˜Jšœœœ ˜/šœ&˜,šœ.˜0Jšœœ(˜3Jšœœ˜ ——šœœœ˜.Jšœœ˜!J˜Jšœ˜—J˜ J˜J˜—š  œœœ!œœ˜RJšœ™Jšœ-™-Jšœœœ ˜1J˜$J˜J˜—š   œœœ!œœ˜RJšœ.™.Jšœ+™+Jšœœ˜Jšœœ˜Jšœ œœ˜Jšœœ˜ šœœœœ˜Jšœ œœ˜ J˜J˜J˜šœ˜ Jšœ<œ˜BJšœ$œ˜*Jšœ˜—J˜—Jšœœœ ˜/šœ˜#J˜J˜Jšœ˜—J˜)Jšœ œ˜)J˜J˜—š   œœœ!œœ˜MJšœ.™.Jšœ+™+Jšœœœ ˜/Jšœ=˜CJ˜J˜—š  œœœ!œœ˜QJšœ(™(Jšœ™Jšœ-˜3J˜J˜—Jš  œœœœ˜J˜š œœœœ˜%J˜—J™Jšœ™J˜š   œœœœœ˜FJšœœ˜šœœœœ˜J˜J˜—Jšœ˜ J˜J˜—š œœœ˜3Jšœœœ˜ J˜J˜šœ˜ Jšœœ˜Jšœœ˜Jšœœ˜Jšœœ˜Jšœœ˜Jšœœ˜šœ ˜šœœ˜ J˜J˜šœœ ˜˜!˜)šœœ ˜˜!J˜———————Jšœ˜—J˜J˜—J™Jšœ™J˜š œ œœœ œ˜HJ˜—š œ ˜Jšœœœ˜Jšœœœ˜7Jšœœ˜ J˜—Jš  œ˜š œ ˜Jšœœ˜&Jšœœ˜Jšœ˜ J˜—Jš  œ˜!J˜š  œœœœœœ˜?Jšœœ˜Jšœ œœœ˜%Jšœ˜Jšœ˜Jšœœ˜ š˜Jšœ,˜,Jšœ œœ˜Jšœœœ˜#J˜šœ ˜Jšœœœ$˜AJšœœœ%˜DJšœœœ%˜DJšœ8˜8Jšœ8˜8Jšœ˜—Jšœœ9˜DJšœœœ œ˜3J˜ Jšœ˜ —J˜J˜—šœ˜J˜J˜J˜——…—(\>!