<> <> <> <> <> <> <> DIRECTORY Atom, Convert, TDJaMScanner, Rope, RopeReader; TDJaMScannerImpl: CEDAR PROGRAM IMPORTS Atom, Convert, Rope, RopeReader EXPORTS TDJaMScanner = BEGIN OPEN TDJaMScanner; Class: TYPE = {nil, toss, keep}; ClassArray: TYPE = REF ClassArrayRep; ClassArrayRep: TYPE = PACKED ARRAY CHAR OF Class; InitClassArray: PROC RETURNS[ClassArray] = { class: ClassArray = NEW[ClassArrayRep _ ALL[nil]]; class[0C] _ class['\r] _ class['\l] _ class['\t] _ class[' ] _ class[',] _ toss; class['{] _ class['}] _ class['(] _ class[')] _ class['"] _ class['%] _ keep; RETURN[class]; }; class: ClassArray = InitClassArray[]; ScanState: TYPE = { null, -- nothing but delimiters so far pstring, -- inside a parenthesized string literal qstring, -- inside a quoted string literal esc1, -- inside quoted string, after \ esc2, -- inside quoted string, 2nd char after \ esc3, -- inside quoted string, 3rd char after \ name, -- scanning a name plus, -- after a single + minus, -- after a single - dot, -- after . or +. or -. int, -- after a valid integer oct, -- after a valid octal number frac, -- after a valid real with fraction part exp1, -- scanning a real, after E exp2, -- scanning a real, after E+ or E- exp3, -- after a valid real with exponent comment -- skipping over a comment }; GetToken: PUBLIC PROC[reader: RopeReader.Ref] RETURNS[Token] = { token: Token _ nullToken; state: ScanState _ null; pnest: INT _ 0; -- nesting depth of parens end: BOOL _ FALSE; token.start _ reader.GetIndex[]; DO char: CHAR; char _ reader.Get[! RopeReader.ReadOffEnd => IF state=null THEN EXIT ELSE { end _ TRUE; char _ '\n; CONTINUE }]; IF char=0C AND reader.Peek[]=0C THEN EXIT; --end of unformatted part of file { SELECT state FROM null => IF class[char]=toss THEN token.start _ token.start+1 -- skip delimiters ELSE SELECT char FROM '{ => { token.type _ lbrace; EXIT }; '} => { token.type _ rbrace; EXIT }; '( => { state _ pstring; pnest _ 0 }; -- begin parenthesized string ') => { token.type _ name; EXIT }; -- treat unmatched ) as a name '" => { state _ qstring }; -- begin quoted string '+ => { state _ plus }; -- might begin number or name '- => { state _ minus }; -- might begin number or name '. => { state _ dot }; -- might begin real or name '% => { state _ comment }; -- begin comment IN['0..'9] => { state _ int }; -- begin integer ENDCASE => { state _ name }; -- begin name pstring => SELECT char FROM '( => pnest _ pnest+1; -- open ') => IF pnest>0 THEN pnest _ pnest-1 ELSE { token.type _ string; EXIT }; -- close ENDCASE => GOTO ExtendString; -- extend string qstring => SELECT char FROM '" => { token.type _ string; EXIT }; -- closing quote '\\ => { token.escaped _ TRUE; state _ esc1 }; -- begin escape sequence ENDCASE => GOTO ExtendString; -- extend string esc1 => SELECT char FROM IN['0..'9] => { state _ esc2 }; -- 1st of up to three digits ENDCASE => { state _ qstring; GOTO ExtendString }; -- other esc2 => SELECT char FROM IN['0..'9] => { state _ esc3 }; -- 2nd digit ENDCASE => { state _ qstring; GOTO ExtendString }; -- other esc3 => SELECT char FROM IN['0..'9] => { state _ qstring }; -- 3rd digit ENDCASE => { state _ qstring; GOTO ExtendString }; -- other name => GOTO TestForEnd; -- test for end of name plus => SELECT char FROM IN['0..'9] => { state _ int }; -- first integer digit '. => { state _ dot }; -- might start a real ENDCASE => GOTO TestForEnd; -- make it a name minus => SELECT char FROM IN['0..'9] => { state _ int }; -- first integer digit '. => { state _ dot }; -- might start a real ENDCASE => GOTO TestForEnd; -- make it a name dot => SELECT char FROM IN['0..'9] => { state _ frac }; -- first fraction digit ENDCASE => GOTO TestForEnd; -- no digits after dot int => SELECT char FROM IN['0..'9] => { }; -- extend integer '. => { state _ frac }; -- fraction coming 'B, 'b => { state _ oct }; -- octal number 'E, 'e => { state _ exp1 }; -- exponent coming ENDCASE => GOTO TestForEnd; -- integer ends here oct => GOTO TestForEnd; -- octal number ends here frac => SELECT char FROM IN['0..'9] => { }; -- extend fraction 'E, 'e => { state _ exp1 }; -- exponent coming ENDCASE => GOTO TestForEnd; -- real with fraction ends here exp1 => SELECT char FROM '+, '- => { state _ exp2 }; -- exponent sign IN['0..'9] => { state _ exp3 }; -- first exponent digit ENDCASE => GOTO TestForEnd; -- make it a name exp2 => SELECT char FROM IN['0..'9] => { state _ exp3 }; -- first exponent digit ENDCASE => GOTO TestForEnd; -- make it a name exp3 => SELECT char FROM IN['0..'9] => { }; -- extend exponent ENDCASE => GOTO TestForEnd; -- real with exponent ends here comment => SELECT char FROM '\n => { token.type _ comment; EXIT }; -- end of comment ENDCASE => { }; -- skip ENDCASE => ERROR; -- unknown state EXITS ExtendString => IF end THEN { token.truncated _ TRUE; token.type _ string; EXIT }; TestForEnd => IF class[char]=nil THEN state _ name -- if it doesn't end here, make it a name ELSE { token.type _ SELECT state FROM int, oct => int, frac, exp3 => real, ENDCASE => name; IF NOT end THEN [] _ reader.Backwards[]; -- put the last character back EXIT; }; }; ENDLOOP; token.len _ reader.GetIndex[]-token.start; RETURN[token]; }; ParseToken: PUBLIC PROC[token: Token, rope: ROPE] RETURNS[Any] = { SELECT token.type FROM int => RETURN[ParseInt[rope, token.start, token.len]]; real => RETURN[ParseReal[rope, token.start, token.len]]; name => RETURN[ParseAtom[rope, token.start, token.len]]; string => { s: ROPE = rope.Substr[token.start+1, token.len-(IF token.truncated THEN 1 ELSE 2)]; RETURN[IF token.escaped THEN RemoveEscapes[s] ELSE s]; }; ENDCASE => ERROR; -- unexpected token type }; ParseInt: PROC[rope: ROPE, start, len: INT] RETURNS[x: Any] = { RETURN[NEW[INT _ Convert.IntFromRope[rope.Substr[start, len]]]]; }; ParseReal: PROC[rope: ROPE, start, len: INT] RETURNS[Any] = { {ENABLE Convert.Error =>IF reason=syntax THEN GOTO Hack; RETURN[NEW[REAL _ Convert.RealFromRope[rope.Substr[start, len]]]]; EXITS Hack => --only until Convert is fixed RETURN[NEW[REAL _ Convert.RealFromRope[Rope.Concat[rope.Substr[start, len],"0"]]]]; }; }; ParseAtom: PROC[rope: ROPE, start, len: INT] RETURNS[Any] = { RETURN[Atom.MakeAtom[rope.Substr[start, len]]]; }; RemoveEscapes: PROC[text: ROPE] RETURNS[ROPE] = { len: INT = text.Length[]; read, write: INT _ 0; Get: PROC RETURNS[ch: CHAR] = { IF read char _ '\n; 't, 'T => char _ '\t; 'b, 'B => char _ '\b; 'f, 'F => char _ '\f; 'l, 'L => char _ '\l; IN['0..'7] => { d: CARDINAL _ char-'0; IF Peek[] IN['0..'7] THEN { d _ d*8 + Get[]-'0; IF Peek[] IN['0..'7] THEN d _ d*8 + Get[]-'0 }; char _ LOOPHOLE[d] }; ENDCASE; }; write _ write+1; } ELSE char _ 0C; }; result: ROPE = Rope.FromProc[len, Put]; RETURN[result.Substr[0, write]]; }; END.