<<>> <> <> <> <> <> <> <> DIRECTORY Atom, Convert, TDJaMScanner, Rope, RopeReader; TDJaMScannerImpl: CEDAR PROGRAM IMPORTS Atom, Convert, Rope, RopeReader EXPORTS TDJaMScanner = BEGIN OPEN TDJaMScanner; Class: TYPE = {nil, toss, keep}; ClassArray: TYPE = REF ClassArrayRep; ClassArrayRep: TYPE = PACKED ARRAY CHAR OF Class; InitClassArray: PROC RETURNS[ClassArray] = { class: ClassArray = NEW[ClassArrayRep ¬ ALL[nil]]; class[0C] ¬ class['\r] ¬ class['\l] ¬ class['\t] ¬ class[' ] ¬ class[',] ¬ toss; class['{] ¬ class['}] ¬ class['(] ¬ class[')] ¬ class['"] ¬ class['%] ¬ keep; RETURN[class]; }; class: ClassArray = InitClassArray[]; ScanState: TYPE = { null, -- nothing but delimiters so far pstring, -- inside a parenthesized string literal qstring, -- inside a quoted string literal esc1, -- inside quoted string, after \ esc2, -- inside quoted string, 2nd char after \ esc3, -- inside quoted string, 3rd char after \ name, -- scanning a name plus, -- after a single + minus, -- after a single - dot, -- after . or +. or -. int, -- after a valid integer oct, -- after a valid octal number frac, -- after a valid real with fraction part exp1, -- scanning a real, after E exp2, -- scanning a real, after E+ or E- exp3, -- after a valid real with exponent comment -- skipping over a comment }; GetToken: PUBLIC PROC[reader: RopeReader.Ref] RETURNS[Token] = { token: Token ¬ nullToken; state: ScanState ¬ null; pnest: INT ¬ 0; -- nesting depth of parens end: BOOL ¬ FALSE; token.start ¬ reader.GetIndex[]; DO char: CHAR; char ¬ reader.Get[! RopeReader.ReadOffEnd => IF state=null THEN EXIT ELSE { end ¬ TRUE; char ¬ '\n; CONTINUE }]; IF char=0C AND reader.Peek[]=0C THEN EXIT; --end of unformatted part of file { SELECT state FROM null => IF class[char]=toss THEN token.start ¬ token.start+1 -- skip delimiters ELSE SELECT char FROM '{ => { token.type ¬ lbrace; EXIT }; '} => { token.type ¬ rbrace; EXIT }; '( => { state ¬ pstring; pnest ¬ 0 }; -- begin parenthesized string ') => { token.type ¬ name; EXIT }; -- treat unmatched ) as a name '" => { state ¬ qstring }; -- begin quoted string '+ => { state ¬ plus }; -- might begin number or name '- => { state ¬ minus }; -- might begin number or name '. => { state ¬ dot }; -- might begin real or name '% => { state ¬ comment }; -- begin comment IN['0..'9] => { state ¬ int }; -- begin integer ENDCASE => { state ¬ name }; -- begin name pstring => SELECT char FROM '( => pnest ¬ pnest+1; -- open ') => IF pnest>0 THEN pnest ¬ pnest-1 ELSE { token.type ¬ string; EXIT }; -- close ENDCASE => GOTO ExtendString; -- extend string qstring => SELECT char FROM '" => { token.type ¬ string; EXIT }; -- closing quote '\\ => { token.escaped ¬ TRUE; state ¬ esc1 }; -- begin escape sequence ENDCASE => GOTO ExtendString; -- extend string esc1 => SELECT char FROM IN['0..'9] => { state ¬ esc2 }; -- 1st of up to three digits ENDCASE => { state ¬ qstring; GOTO ExtendString }; -- other esc2 => SELECT char FROM IN['0..'9] => { state ¬ esc3 }; -- 2nd digit ENDCASE => { state ¬ qstring; GOTO ExtendString }; -- other esc3 => SELECT char FROM IN['0..'9] => { state ¬ qstring }; -- 3rd digit ENDCASE => { state ¬ qstring; GOTO ExtendString }; -- other name => GOTO TestForEnd; -- test for end of name plus => SELECT char FROM IN['0..'9] => { state ¬ int }; -- first integer digit '. => { state ¬ dot }; -- might start a real ENDCASE => GOTO TestForEnd; -- make it a name minus => SELECT char FROM IN['0..'9] => { state ¬ int }; -- first integer digit '. => { state ¬ dot }; -- might start a real ENDCASE => GOTO TestForEnd; -- make it a name dot => SELECT char FROM IN['0..'9] => { state ¬ frac }; -- first fraction digit ENDCASE => GOTO TestForEnd; -- no digits after dot int => SELECT char FROM IN['0..'9] => { }; -- extend integer '. => { state ¬ frac }; -- fraction coming 'B, 'b => { state ¬ oct }; -- octal number 'E, 'e => { state ¬ exp1 }; -- exponent coming ENDCASE => GOTO TestForEnd; -- integer ends here oct => GOTO TestForEnd; -- octal number ends here frac => SELECT char FROM IN['0..'9] => { }; -- extend fraction 'E, 'e => { state ¬ exp1 }; -- exponent coming ENDCASE => GOTO TestForEnd; -- real with fraction ends here exp1 => SELECT char FROM '+, '- => { state ¬ exp2 }; -- exponent sign IN['0..'9] => { state ¬ exp3 }; -- first exponent digit ENDCASE => GOTO TestForEnd; -- make it a name exp2 => SELECT char FROM IN['0..'9] => { state ¬ exp3 }; -- first exponent digit ENDCASE => GOTO TestForEnd; -- make it a name exp3 => SELECT char FROM IN['0..'9] => { }; -- extend exponent ENDCASE => GOTO TestForEnd; -- real with exponent ends here comment => SELECT char FROM '\n => { token.type ¬ comment; EXIT }; -- end of comment ENDCASE => { }; -- skip ENDCASE => ERROR; -- unknown state EXITS ExtendString => IF end THEN { token.truncated ¬ TRUE; token.type ¬ string; EXIT }; TestForEnd => IF class[char]=nil THEN state ¬ name -- if it doesn't end here, make it a name ELSE { token.type ¬ SELECT state FROM int, oct => int, frac, exp3 => real, ENDCASE => name; IF NOT end THEN [] ¬ reader.Backwards[]; -- put the last character back EXIT; }; }; ENDLOOP; token.len ¬ reader.GetIndex[]-token.start; RETURN[token]; }; ParseToken: PUBLIC PROC[token: Token, rope: ROPE] RETURNS[Any] = { SELECT token.type FROM int => RETURN[ParseInt[rope, token.start, token.len]]; real => RETURN[ParseReal[rope, token.start, token.len]]; name => RETURN[ParseAtom[rope, token.start, token.len]]; string => { s: ROPE = rope.Substr[token.start+1, token.len-(IF token.truncated THEN 1 ELSE 2)]; RETURN[IF token.escaped THEN RemoveEscapes[s] ELSE s]; }; ENDCASE => ERROR; -- unexpected token type }; ParseInt: PROC[rope: ROPE, start, len: INT] RETURNS[x: Any] = { RETURN[NEW[INT ¬ Convert.IntFromRope[rope.Substr[start, len]]]]; }; ParseReal: PROC[rope: ROPE, start, len: INT] RETURNS[Any] = { {ENABLE Convert.Error =>IF reason=syntax THEN GOTO Hack; RETURN[NEW[REAL ¬ Convert.RealFromRope[rope.Substr[start, len]]]]; EXITS Hack => --only until Convert is fixed RETURN[NEW[REAL ¬ Convert.RealFromRope[Rope.Concat[rope.Substr[start, len],"0"]]]]; }; }; ParseAtom: PROC[rope: ROPE, start, len: INT] RETURNS[Any] = { RETURN[Atom.MakeAtom[rope.Substr[start, len]]]; }; RemoveEscapes: PROC[text: ROPE] RETURNS[ROPE] = { len: INT = text.Length[]; read, write: INT ¬ 0; Get: PROC RETURNS[ch: CHAR] = { IF read char ¬ '\n; 't, 'T => char ¬ '\t; 'b, 'B => char ¬ '\b; 'f, 'F => char ¬ '\f; 'l, 'L => char ¬ '\l; IN['0..'7] => { d: CARDINAL ¬ char-'0; IF Peek[] IN['0..'7] THEN { d ¬ d*8 + Get[]-'0; IF Peek[] IN['0..'7] THEN d ¬ d*8 + Get[]-'0 }; char ¬ LOOPHOLE[d] }; ENDCASE; }; write ¬ write+1; } ELSE char ¬ 0C; }; result: ROPE = Rope.FromProc[len, Put]; RETURN[result.Substr[0, write]]; }; END.