JaMScannerImpl.mesa
Copyright Ó 1985, 1992 by Xerox Corporation. All rights reserved.
Original version by John Warnock, March 7, 1979
Paxton, 22-Jan-82 10:19:46
Maureen Stone February 14, 1984 4:04:32 pm PST
Doug Wyatt, March 18, 1985 3:30:36 pm PST
Last tweaked by Mike Spreitzer on January 8, 1990 4:33:49 pm PST
DIRECTORY Atom, Convert, TDJaMScanner, Rope, RopeReader;
TDJaMScannerImpl: CEDAR PROGRAM
IMPORTS Atom, Convert, Rope, RopeReader
EXPORTS TDJaMScanner
= BEGIN OPEN TDJaMScanner;
Class: TYPE = {nil, toss, keep};
ClassArray: TYPE = REF ClassArrayRep;
ClassArrayRep: TYPE = PACKED ARRAY CHAR OF Class;
InitClassArray: PROC RETURNS[ClassArray] = {
class: ClassArray = NEW[ClassArrayRep ¬ ALL[nil]];
class[0C] ¬ class['\r] ¬ class['\l] ¬ class['\t] ¬ class[' ] ¬ class[',] ¬ toss;
class['{] ¬ class['}] ¬ class['(] ¬ class[')] ¬ class['"] ¬ class['%] ¬ keep;
RETURN[class];
};
class: ClassArray = InitClassArray[];
ScanState: TYPE = {
null, -- nothing but delimiters so far
pstring, -- inside a parenthesized string literal
qstring, -- inside a quoted string literal
esc1, -- inside quoted string, after \
esc2, -- inside quoted string, 2nd char after \
esc3, -- inside quoted string, 3rd char after \
name, -- scanning a name
plus, -- after a single +
minus, -- after a single -
dot, -- after . or +. or -.
int, -- after a valid integer
oct, -- after a valid octal number
frac, -- after a valid real with fraction part
exp1, -- scanning a real, after E
exp2, -- scanning a real, after E+ or E-
exp3, -- after a valid real with exponent
comment -- skipping over a comment
};
GetToken: PUBLIC PROC[reader: RopeReader.Ref] RETURNS[Token] = {
token: Token ¬ nullToken;
state: ScanState ¬ null;
pnest: INT ¬ 0; -- nesting depth of parens
end: BOOL ¬ FALSE;
token.start ¬ reader.GetIndex[];
DO char: CHAR;
char ¬ reader.Get[! RopeReader.ReadOffEnd =>
IF state=null THEN EXIT
ELSE { end ¬ TRUE; char ¬ '\n; CONTINUE }];
IF char=0C AND reader.Peek[]=0C THEN EXIT; --end of unformatted part of file
{ SELECT state FROM
null => IF class[char]=toss THEN token.start ¬ token.start+1 -- skip delimiters
ELSE SELECT char FROM
'{ => { token.type ¬ lbrace; EXIT };
'} => { token.type ¬ rbrace; EXIT };
'( => { state ¬ pstring; pnest ¬ 0 }; -- begin parenthesized string
') => { token.type ¬ name; EXIT }; -- treat unmatched ) as a name
'" => { state ¬ qstring }; -- begin quoted string
'+ => { state ¬ plus }; -- might begin number or name
'- => { state ¬ minus }; -- might begin number or name
'. => { state ¬ dot }; -- might begin real or name
'% => { state ¬ comment }; -- begin comment
IN['0..'9] => { state ¬ int }; -- begin integer
ENDCASE => { state ¬ name }; -- begin name
pstring => SELECT char FROM
'( => pnest ¬ pnest+1; -- open
') => IF pnest>0 THEN pnest ¬ pnest-1 ELSE { token.type ¬ string; EXIT }; -- close
ENDCASE => GOTO ExtendString; -- extend string
qstring => SELECT char FROM
'" => { token.type ¬ string; EXIT }; -- closing quote
'\\ => { token.escaped ¬ TRUE; state ¬ esc1 }; -- begin escape sequence
ENDCASE => GOTO ExtendString; -- extend string
esc1 => SELECT char FROM
IN['0..'9] => { state ¬ esc2 }; -- 1st of up to three digits
ENDCASE => { state ¬ qstring; GOTO ExtendString }; -- other
esc2 => SELECT char FROM
IN['0..'9] => { state ¬ esc3 }; -- 2nd digit
ENDCASE => { state ¬ qstring; GOTO ExtendString }; -- other
esc3 => SELECT char FROM
IN['0..'9] => { state ¬ qstring }; -- 3rd digit
ENDCASE => { state ¬ qstring; GOTO ExtendString }; -- other
name => GOTO TestForEnd; -- test for end of name
plus => SELECT char FROM
IN['0..'9] => { state ¬ int }; -- first integer digit
'. => { state ¬ dot }; -- might start a real
ENDCASE => GOTO TestForEnd; -- make it a name
minus => SELECT char FROM
IN['0..'9] => { state ¬ int }; -- first integer digit
'. => { state ¬ dot }; -- might start a real
ENDCASE => GOTO TestForEnd; -- make it a name
dot => SELECT char FROM
IN['0..'9] => { state ¬ frac }; -- first fraction digit
ENDCASE => GOTO TestForEnd; -- no digits after dot
int => SELECT char FROM
IN['0..'9] => { }; -- extend integer
'. => { state ¬ frac }; -- fraction coming
'B, 'b => { state ¬ oct }; -- octal number
'E, 'e => { state ¬ exp1 }; -- exponent coming
ENDCASE => GOTO TestForEnd; -- integer ends here
oct => GOTO TestForEnd; -- octal number ends here
frac => SELECT char FROM
IN['0..'9] => { }; -- extend fraction
'E, 'e => { state ¬ exp1 }; -- exponent coming
ENDCASE => GOTO TestForEnd; -- real with fraction ends here
exp1 => SELECT char FROM
'+, '- => { state ¬ exp2 }; -- exponent sign
IN['0..'9] => { state ¬ exp3 }; -- first exponent digit
ENDCASE => GOTO TestForEnd; -- make it a name
exp2 => SELECT char FROM
IN['0..'9] => { state ¬ exp3 }; -- first exponent digit
ENDCASE => GOTO TestForEnd; -- make it a name
exp3 => SELECT char FROM
IN['0..'9] => { }; -- extend exponent
ENDCASE => GOTO TestForEnd; -- real with exponent ends here
comment => SELECT char FROM
'\n => { token.type ¬ comment; EXIT }; -- end of comment
ENDCASE => { }; -- skip
ENDCASE => ERROR; -- unknown state
EXITS
ExtendString =>
IF end THEN { token.truncated ¬ TRUE; token.type ¬ string; EXIT };
TestForEnd =>
IF class[char]=nil THEN state ¬ name -- if it doesn't end here, make it a name
ELSE {
token.type ¬ SELECT state FROM
int, oct => int, frac, exp3 => real, ENDCASE => name;
IF NOT end THEN [] ¬ reader.Backwards[]; -- put the last character back
EXIT;
};
};
ENDLOOP;
token.len ¬ reader.GetIndex[]-token.start;
RETURN[token];
};
ParseToken: PUBLIC PROC[token: Token, rope: ROPE] RETURNS[Any] = {
SELECT token.type FROM
int => RETURN[ParseInt[rope, token.start, token.len]];
real => RETURN[ParseReal[rope, token.start, token.len]];
name => RETURN[ParseAtom[rope, token.start, token.len]];
string => {
s: ROPE = rope.Substr[token.start+1, token.len-(IF token.truncated THEN 1 ELSE 2)];
RETURN[IF token.escaped THEN RemoveEscapes[s] ELSE s];
};
ENDCASE => ERROR; -- unexpected token type
};
ParseInt: PROC[rope: ROPE, start, len: INT] RETURNS[x: Any] = {
RETURN[NEW[INT ¬ Convert.IntFromRope[rope.Substr[start, len]]]];
};
ParseReal: PROC[rope: ROPE, start, len: INT] RETURNS[Any] = {
{ENABLE Convert.Error =>IF reason=syntax THEN GOTO Hack;
RETURN[NEW[REAL ¬ Convert.RealFromRope[rope.Substr[start, len]]]];
EXITS Hack => --only until Convert is fixed
RETURN[NEW[REAL ¬ Convert.RealFromRope[Rope.Concat[rope.Substr[start, len],"0"]]]];
};
};
ParseAtom: PROC[rope: ROPE, start, len: INT] RETURNS[Any] = {
RETURN[Atom.MakeAtom[rope.Substr[start, len]]];
};
RemoveEscapes: PROC[text: ROPE] RETURNS[ROPE] = {
len: INT = text.Length[];
read, write: INT ¬ 0;
Get: PROC RETURNS[ch: CHAR] = {
IF read<len THEN { ch ¬ text.Fetch[read]; read ¬ read+1 }
ELSE ch ¬ 0C;
};
Peek: PROC RETURNS[ch: CHAR] = {
IF read<len THEN { ch ¬ text.Fetch[read] }
ELSE ch ¬ 0C;
};
Put: PROC RETURNS[char: CHAR] = {
IF read < len THEN {
char ¬ Get[];
IF char = '\\ THEN {
char ¬ Get[];
SELECT char FROM
'n, 'N, 'r, 'R => char ¬ '\n;
't, 'T => char ¬ '\t;
'b, 'B => char ¬ '\b;
'f, 'F => char ¬ '\f;
'l, 'L => char ¬ '\l;
IN['0..'7] => { d: CARDINAL ¬ char-'0;
IF Peek[] IN['0..'7] THEN { d ¬ d*8 + Get[]-'0;
IF Peek[] IN['0..'7] THEN d ¬ d*8 + Get[]-'0 };
char ¬ LOOPHOLE[d] };
ENDCASE;
};
write ¬ write+1;
}
ELSE char ¬ 0C;
};
result: ROPE = Rope.FromProc[len, Put];
RETURN[result.Substr[0, write]];
};
END.