JaMScannerImpl.mesa
Copyright © 1985 by Xerox Corporation. All rights reserved.
Original version by John Warnock, March 7, 1979
Paxton, 22-Jan-82 10:19:46
Maureen Stone February 14, 1984 4:04:32 pm PST
Doug Wyatt, March 18, 1985 3:30:36 pm PST
Last tweaked by Mike Spreitzer on January 8, 1990 4:33:49 pm PST
DIRECTORY Atom, Convert, TDJaMScanner, Rope, RopeReader;
TDJaMScannerImpl: CEDAR PROGRAM
IMPORTS Atom, Convert, Rope, RopeReader
EXPORTS TDJaMScanner
= BEGIN OPEN TDJaMScanner;
Class: TYPE = {nil, toss, keep};
ClassArray: TYPE = REF ClassArrayRep;
ClassArrayRep: TYPE = PACKED ARRAY CHAR OF Class;
InitClassArray: PROC RETURNS[ClassArray] = {
class: ClassArray = NEW[ClassArrayRep ← ALL[nil]];
class[0C] ← class['\r] ← class['\l] ← class['\t] ← class[' ] ← class[',] ← toss;
class['{] ← class['}] ← class['(] ← class[')] ← class['"] ← class['%] ← keep;
RETURN[class];
};
class: ClassArray = InitClassArray[];
ScanState: TYPE = {
null, -- nothing but delimiters so far
pstring, -- inside a parenthesized string literal
qstring, -- inside a quoted string literal
esc1, -- inside quoted string, after \
esc2, -- inside quoted string, 2nd char after \
esc3, -- inside quoted string, 3rd char after \
name, -- scanning a name
plus, -- after a single +
minus, -- after a single -
dot, -- after . or +. or -.
int, -- after a valid integer
oct, -- after a valid octal number
frac, -- after a valid real with fraction part
exp1, -- scanning a real, after E
exp2, -- scanning a real, after E+ or E-
exp3, -- after a valid real with exponent
comment -- skipping over a comment
};
GetToken: PUBLIC PROC[reader: RopeReader.Ref] RETURNS[Token] = {
token: Token ← nullToken;
state: ScanState ← null;
pnest: INT ← 0; -- nesting depth of parens
end: BOOLFALSE;
token.start ← reader.GetIndex[];
DO char: CHAR;
char ← reader.Get[! RopeReader.ReadOffEnd =>
IF state=null THEN EXIT
ELSE { end ← TRUE; char ← '\n; CONTINUE }];
IF char=0C AND reader.Peek[]=0C THEN EXIT; --end of unformatted part of file
{ SELECT state FROM
null => IF class[char]=toss THEN token.start ← token.start+1 -- skip delimiters
ELSE SELECT char FROM
'{ => { token.type ← lbrace; EXIT };
'} => { token.type ← rbrace; EXIT };
'( => { state ← pstring; pnest ← 0 }; -- begin parenthesized string
') => { token.type ← name; EXIT }; -- treat unmatched ) as a name
'" => { state ← qstring }; -- begin quoted string
'+ => { state ← plus }; -- might begin number or name
'- => { state ← minus }; -- might begin number or name
'. => { state ← dot }; -- might begin real or name
'% => { state ← comment }; -- begin comment
IN['0..'9] => { state ← int }; -- begin integer
ENDCASE => { state ← name }; -- begin name
pstring => SELECT char FROM
'( => pnest ← pnest+1; -- open
') => IF pnest>0 THEN pnest ← pnest-1 ELSE { token.type ← string; EXIT }; -- close
ENDCASE => GOTO ExtendString; -- extend string
qstring => SELECT char FROM
'" => { token.type ← string; EXIT }; -- closing quote
'\\ => { token.escaped ← TRUE; state ← esc1 }; -- begin escape sequence
ENDCASE => GOTO ExtendString; -- extend string
esc1 => SELECT char FROM
IN['0..'9] => { state ← esc2 }; -- 1st of up to three digits
ENDCASE => { state ← qstring; GOTO ExtendString }; -- other
esc2 => SELECT char FROM
IN['0..'9] => { state ← esc3 }; -- 2nd digit
ENDCASE => { state ← qstring; GOTO ExtendString }; -- other
esc3 => SELECT char FROM
IN['0..'9] => { state ← qstring }; -- 3rd digit
ENDCASE => { state ← qstring; GOTO ExtendString }; -- other
name => GOTO TestForEnd; -- test for end of name
plus => SELECT char FROM
IN['0..'9] => { state ← int }; -- first integer digit
'. => { state ← dot }; -- might start a real
ENDCASE => GOTO TestForEnd; -- make it a name
minus => SELECT char FROM
IN['0..'9] => { state ← int }; -- first integer digit
'. => { state ← dot }; -- might start a real
ENDCASE => GOTO TestForEnd; -- make it a name
dot => SELECT char FROM
IN['0..'9] => { state ← frac }; -- first fraction digit
ENDCASE => GOTO TestForEnd; -- no digits after dot
int => SELECT char FROM
IN['0..'9] => { }; -- extend integer
'. => { state ← frac }; -- fraction coming
'B, 'b => { state ← oct }; -- octal number
'E, 'e => { state ← exp1 }; -- exponent coming
ENDCASE => GOTO TestForEnd; -- integer ends here
oct => GOTO TestForEnd; -- octal number ends here
frac => SELECT char FROM
IN['0..'9] => { }; -- extend fraction
'E, 'e => { state ← exp1 }; -- exponent coming
ENDCASE => GOTO TestForEnd; -- real with fraction ends here
exp1 => SELECT char FROM
'+, '- => { state ← exp2 }; -- exponent sign
IN['0..'9] => { state ← exp3 }; -- first exponent digit
ENDCASE => GOTO TestForEnd; -- make it a name
exp2 => SELECT char FROM
IN['0..'9] => { state ← exp3 }; -- first exponent digit
ENDCASE => GOTO TestForEnd; -- make it a name
exp3 => SELECT char FROM
IN['0..'9] => { }; -- extend exponent
ENDCASE => GOTO TestForEnd; -- real with exponent ends here
comment => SELECT char FROM
'\n => { token.type ← comment; EXIT }; -- end of comment
ENDCASE => { }; -- skip
ENDCASE => ERROR; -- unknown state
EXITS
ExtendString =>
IF end THEN { token.truncated ← TRUE; token.type ← string; EXIT };
TestForEnd =>
IF class[char]=nil THEN state ← name -- if it doesn't end here, make it a name
ELSE {
token.type ← SELECT state FROM
int, oct => int, frac, exp3 => real, ENDCASE => name;
IF NOT end THEN [] ← reader.Backwards[]; -- put the last character back
EXIT;
};
};
ENDLOOP;
token.len ← reader.GetIndex[]-token.start;
RETURN[token];
};
ParseToken: PUBLIC PROC[token: Token, rope: ROPE] RETURNS[Any] = {
SELECT token.type FROM
int => RETURN[ParseInt[rope, token.start, token.len]];
real => RETURN[ParseReal[rope, token.start, token.len]];
name => RETURN[ParseAtom[rope, token.start, token.len]];
string => {
s: ROPE = rope.Substr[token.start+1, token.len-(IF token.truncated THEN 1 ELSE 2)];
RETURN[IF token.escaped THEN RemoveEscapes[s] ELSE s];
};
ENDCASE => ERROR; -- unexpected token type
};
ParseInt: PROC[rope: ROPE, start, len: INT] RETURNS[x: Any] = {
RETURN[NEW[INT ← Convert.IntFromRope[rope.Substr[start, len]]]];
};
ParseReal: PROC[rope: ROPE, start, len: INT] RETURNS[Any] = {
{ENABLE Convert.Error =>IF reason=syntax THEN GOTO Hack;
RETURN[NEW[REAL ← Convert.RealFromRope[rope.Substr[start, len]]]];
EXITS Hack => --only until Convert is fixed
RETURN[NEW[REAL ← Convert.RealFromRope[Rope.Concat[rope.Substr[start, len],"0"]]]];
};
};
ParseAtom: PROC[rope: ROPE, start, len: INT] RETURNS[Any] = {
RETURN[Atom.MakeAtom[rope.Substr[start, len]]];
};
RemoveEscapes: PROC[text: ROPE] RETURNS[ROPE] = {
len: INT = text.Length[];
read, write: INT ← 0;
Get: PROC RETURNS[ch: CHAR] = {
IF read<len THEN { ch ← text.Fetch[read]; read ← read+1 }
ELSE ch ← 0C;
};
Peek: PROC RETURNS[ch: CHAR] = {
IF read<len THEN { ch ← text.Fetch[read] }
ELSE ch ← 0C;
};
Put: PROC RETURNS[char: CHAR] = {
IF read < len THEN {
char ← Get[];
IF char = '\\ THEN {
char ← Get[];
SELECT char FROM
'n, 'N, 'r, 'R => char ← '\n;
't, 'T => char ← '\t;
'b, 'B => char ← '\b;
'f, 'F => char ← '\f;
'l, 'L => char ← '\l;
IN['0..'7] => { d: CARDINAL ← char-'0;
IF Peek[] IN['0..'7] THEN { d ← d*8 + Get[]-'0;
IF Peek[] IN['0..'7] THEN d ← d*8 + Get[]-'0 };
char ← LOOPHOLE[d] };
ENDCASE;
};
write ← write+1;
}
ELSE char ← 0C;
};
result: ROPE = Rope.FromProc[len, Put];
RETURN[result.Substr[0, write]];
};
END.