<> <> <> DIRECTORY Ascii USING [BS, CR, FF, LF, NUL, TAB], Atom USING [MakeAtom], CedarScanner USING [GetClosure, GetProc, Token, TokenKind], Real USING [ReadReal], Rope USING [Fetch, FromProc, ROPE, Size, Substr]; CedarScannerImpl: CEDAR PROGRAM IMPORTS Atom, Real, Rope EXPORTS CedarScanner SHARES Rope = BEGIN OPEN Ascii, CedarScanner, Rope; <> ControlZ: CHAR = 32C; -- Bravo escape char ControlCR: CHAR = 215C; -- control CR (Bravo relic) LastIntDiv10: INT = LAST[INT]/10; LastCardDiv8: INT = LAST[LONG CARDINAL]/8; GetToken: PUBLIC PROC [get: GetClosure,index: INT] RETURNS [token: Token] = { FirstChar: PROC [kind: TokenKind] = { <> <> token.kind _ kind; token.start _ index - 1; token.next _ index; char _ get.proc[get.data, index]; index _ index + 1; }; AddChar: PROC [] = { <> token.next _ index}; AddCharPlus: PROC [] = { <> token.next _ index; char _ get.proc[get.data, index]; index _ index + 1}; NextChar: PROC = { <> char _ get.proc[get.data, index]; index _ index + 1}; PeekChar: PROC = { <> char _ get.proc[get.data, index]; }; AcceptEscapeCode: PROC RETURNS [BOOL] = { <> <> SELECT char FROM 'n, 'N, 'r, 'R, 't, 'T, 'b, 'B, 'f, 'F, 'l, 'L, '', '", '\\ => { AddCharPlus[]; RETURN [TRUE]}; IN ['0..'3] => { AddCharPlus[]; IF char IN ['0..'7] THEN { AddCharPlus[]; IF char IN ['0..'7] THEN { AddCharPlus[]; RETURN [TRUE]}}}; ENDCASE; token.msg _ "invalid escape code"; RETURN [FALSE]; }; char: CHAR _ get.proc[get.data, index]; index _ index + 1; token.msg _ NIL; DO -- at start of token (we think) {-- start block for exits lag: CHAR _ char; SELECT char FROM ControlZ => { DO -- skip over the Bravo garbage NextChar[]; SELECT char FROM NUL => IF lag = NUL THEN GO TO EndOfFile; CR, ControlCR => EXIT; ENDCASE; lag _ char; ENDLOOP; }; IN [NUL..' ], ControlCR => { NextChar[]; IF char = NUL AND lag = NUL THEN GO TO EndOfFile}; IN ['a..'z], IN ['A..'Z] => { FirstChar[tokenID]; DO SELECT char FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => { AddCharPlus[]; LOOP}; ENDCASE => RETURN; ENDLOOP; }; '0, '1, '2, '3, '4, '5, '6, '7, '8, '9 => { <> card: CARDINAL _ char - '0; octalValid: BOOL _ card < 8; small: BOOL _ octalValid; hexLast: INT _ -1; FirstChar[tokenINT]; WHILE char IN ['0..'9] DO octalValid _ octalValid AND char IN ['0..'7]; card _ card * 8 + (char-'0); small _ small AND octalValid AND card < 400B; AddCharPlus[]; ENDLOOP; <> hexLast _ index-1; DO tc: CHAR _ get.proc[get.data, hexLast]; SELECT tc FROM IN ['0..'9], IN ['A..'F], IN ['a..'f] => { hexLast _ hexLast + 1; LOOP}; 'X, 'x, 'H, 'h => { <> token.next _ hexLast + 1; RETURN; }; ENDCASE => EXIT; ENDLOOP; IF char = '. THEN { <> PeekChar[]; -- look at the next char, but don't eat it IF char = '. THEN RETURN; -- no fraction, will be a ".." token token.kind _ tokenREAL; AddChar[]; -- accept the original '. WHILE char IN ['0..'9] DO AddCharPlus[]; ENDLOOP}; SELECT char FROM 'e, 'E => { <> AddCharPlus[]; IF char = '- OR char = '+ THEN AddCharPlus[]; WHILE char IN ['0..'9] DO AddCharPlus[]; ENDLOOP; token.kind _ tokenREAL; }; 'b, 'B => { IF token.kind = tokenREAL THEN RETURN; AddCharPlus[]; IF NOT octalValid THEN { token.msg _ "invalid octal constant"; GO TO Error}; }; 'c, 'C => { IF token.kind = tokenREAL THEN RETURN; AddCharPlus[]; IF NOT small OR card > 377B THEN {token.msg _ "invalid character code"; GO TO Error}; token.kind _ tokenCHAR; }; ENDCASE; RETURN; }; ',, ';, ':, '_, '#, '~, '+, '*, '/, '^, '@, '!, '(, '), '[, '], '{, '} => { FirstChar[tokenSINGLE]; RETURN; }; '' => { FirstChar[tokenCHAR]; lag _ char; AddCharPlus[]; IF lag = NUL AND char = NUL THEN {token.msg _ "end-of-file in char literal"; GOTO Error; }; IF lag # '\\ THEN RETURN; IF AcceptEscapeCode[] THEN RETURN; GO TO Error; }; '" => { FirstChar[tokenROPE]; DO -- eat up the string/rope literal SELECT char FROM '" => { AddCharPlus[]; IF char # '" THEN EXIT}; '\\ => { AddCharPlus[]; IF AcceptEscapeCode[] THEN LOOP; GO TO Error}; NUL => { AddCharPlus[]; IF char # NUL THEN LOOP; token.msg _ "end-of-file in string literal"; GO TO Error}; ENDCASE; AddCharPlus[]; ENDLOOP; <> IF char = 'L OR char = 'l THEN AddCharPlus[]; RETURN; }; '$ => { FirstChar[tokenATOM]; SELECT char FROM IN ['a..'z], IN ['A..'Z] => {}; ENDCASE => {token.msg _ "invalid atom"; GO TO Error}; DO -- accumulate rest of atom name SELECT char FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {}; ENDCASE => EXIT; AddCharPlus[] ENDLOOP; RETURN}; '- => { <> FirstChar[tokenSINGLE]; IF char # '- THEN RETURN; token.kind _ tokenCOMMENT; lag _ CR; <> DO AddCharPlus[]; SELECT char FROM CR, ControlCR => GO TO LastOne; '- => IF lag = '- THEN GO TO LastOne; NUL => IF lag = NUL THEN {token.next _ index - 2; RETURN}; ENDCASE; lag _ char; ENDLOOP; }; '. => { <> FirstChar[tokenSINGLE]; IF char = '. THEN GO TO Double; IF char NOT IN ['0..'9] THEN RETURN; token.kind _ tokenREAL; AddCharPlus[]; WHILE char IN ['0..'9] DO AddCharPlus[]; ENDLOOP; IF char = 'E OR char = 'e THEN { <> AddCharPlus[]; IF char = '- OR char = '+ THEN AddCharPlus[]; WHILE char IN ['0..'9] DO AddCharPlus[]; ENDLOOP}; RETURN; }; '= => { <'>> FirstChar[tokenSINGLE]; IF char = '> THEN GO TO Double ELSE RETURN; }; '>, '< => { <' or '>=' (or '<' or '<=')>> FirstChar[tokenSINGLE]; IF char = '= THEN GO TO Double ELSE RETURN; }; ENDCASE => { FirstChar[tokenERROR]; token.msg _ "invalid character"; RETURN; }; EXITS Error => { token.kind _ tokenERROR; RETURN}; Double => { token.kind _ tokenDOUBLE; token.next _ index; RETURN}; LastOne => { token.next _ index; RETURN}; EndOfFile => { token.start _ token.next _ index - 2; token.kind _ tokenEOF; RETURN}; }; ENDLOOP; }; RealFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [REAL] = TRUSTED { <> <> index: INT _ token.start; gp: PROC RETURNS [c: CHAR] = TRUSTED { c _ get.proc[get.data, index]; index _ index + 1; }; IF token.kind # tokenREAL THEN ERROR WrongKind; RETURN [Real.ReadReal[gp]]; }; IntFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [x: INT _ 0] = { <> <> base: NAT _ 10; lc: CHAR _ 0C; end: INT _ token.next - 1; over: INT _ LastIntDiv10; IF token.kind # tokenINT THEN ERROR WrongKind; lc _ get.proc[get.data, end]; SELECT lc FROM 'H, 'h, 'X, 'x => { FOR i: INT IN [token.start..end-1] DO c: CHAR _ get.proc[get.data, i]; SELECT c FROM IN ['0..'9] => x _ x * 16 + (get.proc[get.data, i] - '0); IN ['A..'F] => x _ x * 16 + (get.proc[get.data, i] - 'A) + 10; IN ['a..'f] => x _ x * 16 + (get.proc[get.data, i] - 'a) + 10; ENDCASE; ENDLOOP; RETURN}; 'B, 'b => { base _ 8; end _ end - 1; over _ LastCardDiv8}; ENDCASE; FOR i: INT IN [token.start..end] DO IF x > over THEN ERROR IntegerOverflow; x _ x * base + (get.proc[get.data, i] - '0); IF x < 0 AND base = 10 THEN ERROR IntegerOverflow; ENDLOOP; }; CharFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [c: CHAR] = { <> <> x: CARDINAL _ 0; IF token.kind # tokenCHAR THEN ERROR WrongKind; IF get.proc[get.data, token.start] = '' THEN IF (c _ get.proc[get.data, token.start+1]) = '\\ THEN RETURN [ParseEscapeCode[get, token.start+2].c] ELSE RETURN; FOR i: INT IN [token.start..token.next - 2] DO tc: CHAR _ get.proc[get.data, i]; x _ x * 8 + (tc - '0); ENDLOOP; c _ x + 0C; }; SingleFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [c: CHAR] = { <> <> IF token.kind # tokenSINGLE THEN ERROR WrongKind; c _ get.proc[get.data, token.start]; }; RopeFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [new: ROPE] = { <> <> index: INT _ token.start + 1; end: INT _ token.next - 1; escaped: BOOL _ FALSE; res: INT _ 0; gp: PROC RETURNS [c: CHAR] = { IF index > end THEN RETURN [0C]; c _ get.proc[get.data, index]; index _ index + 1; res _ res + 1; SELECT c FROM '\\ => {[c, index] _ ParseEscapeCode[get, index]; escaped _ TRUE}; '" => {index _ index + 1; escaped _ TRUE}; ENDCASE; }; IF token.kind # tokenROPE THEN ERROR WrongKind; SELECT get.proc[get.data, end] FROM 'l, 'L => end _ end - 2; '" => end _ end - 1; ENDCASE; new _ Rope.FromProc[end - index + 1, gp]; IF escaped THEN new _ new.Substr[0, res]; }; AtomFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [ATOM] = { <> <> IF token.kind # tokenATOM THEN ERROR WrongKind; RETURN[Atom.MakeAtom[ExtractRope[get, token.start+1, token.next]]]; }; ContentsFromToken: PUBLIC PROC [get: GetClosure, token: Token] RETURNS [ROPE] = { <> <> RETURN [ExtractRope[get, token.start, token.next]]; }; WrongKind: PUBLIC ERROR = CODE; IntegerOverflow: PUBLIC ERROR = CODE; <> ExtractRope: PROC [get: GetClosure, start,end: INT] RETURNS [ROPE] = { size: INT _ end - start; gp: PROC RETURNS [c: CHAR] = { c _ get.proc[get.data, start]; start _ start + 1}; RETURN[Rope.FromProc[size, gp]]; }; ParseEscapeCode: PROC [get: GetClosure, index: INT] RETURNS [c: CHAR, next: INT] = { c _ get.proc[get.data, index]; next _ index + 1; SELECT c FROM 'n, 'N => c _ Ascii.CR; 'r, 'R => c _ Ascii.CR; 't, 'T => c _ Ascii.TAB; 'b, 'B => c _ Ascii.BS; 'f, 'F => c _ Ascii.FF; 'l, 'L => c _ Ascii.LF; IN ['0..'3] => { cc: CHAR; c _ 0C + (c-'0); cc _ get.proc[get.data, next]; IF cc IN ['0..'7] THEN { c _ 0C + ((cc-'0) + (c-0C)*10B); cc _ get.proc[get.data, next _ next + 1]; IF cc IN ['0..'7] THEN { c _ 0C + ((cc-'0) + (c-0C)*10B); next _ next + 1}}}; ENDCASE; }; <> TestResults: TYPE = RECORD [token: Token, contents: ROPE, literal: REF]; get: GetProc = { rope: ROPE = NARROW[data]; IF index < rope.Size[] THEN RETURN [rope.Fetch[index]]; RETURN [NUL]; }; getClosure: GetClosure _ [get]; tget: GetProc = { c: CHAR _ get[getClosure.data, index]; IF c = '& THEN c _ 'A; RETURN [c]; }; tgetClosure: GetClosure _ [tget]; Test: PROC [rope: ROPE] RETURNS [list: LIST OF TestResults] = { index: INT _ 0; lag, temp: LIST OF TestResults _ NIL; getClosure.data _ rope; tgetClosure.data _ rope; list _ NIL; DO token: Token _ GetToken[tgetClosure, index]; literal: REF _ NIL; IF token.kind = tokenEOF THEN EXIT; index _ token.next; SELECT token.kind FROM tokenINT => literal _ NEW[INT _ IntFromToken[getClosure, token]]; tokenREAL => literal _ NEW[REAL _ RealFromToken[getClosure, token]]; tokenCHAR => literal _ NEW[CHAR _ CharFromToken[getClosure, token]]; tokenROPE => literal _ RopeFromToken[getClosure, token]; tokenATOM => literal _ AtomFromToken[getClosure, token]; ENDCASE; temp _ LIST[[token, ContentsFromToken[getClosure, token], literal]]; IF lag = NIL THEN list _ temp ELSE lag.rest _ temp; lag _ temp; ENDLOOP; }; END.