JaMScannerImpl.mesa
Copyright Ó 1985, 1992 by Xerox Corporation. All rights reserved.
Original version by John Warnock, March 7, 1979
Paxton, 22-Jan-82 10:19:46
Maureen Stone February 14, 1984 4:04:32 pm PST
Doug Wyatt, March 18, 1985 3:30:36 pm PST
Bier, September 6, 1990 11:52 pm PDT
DIRECTORY
Convert USING [IntFromRope, RealFromRope, Error],
JaM USING [Any, Array, CountToMark, Execute, MakeArray, PopMark, Push, PushArray, PushMark, ROPE, RopeToAtom, State],
Rope USING [Fetch, FromProc, Length, Substr, Concat],
RopeReader USING [Backwards, GetRopeReader, FreeRopeReader, Get, GetIndex, GetRope, ReadOffEnd, Ref, SetPosition, Peek];
JaMScannerImpl: CEDAR PROGRAM
IMPORTS Convert, JaM, Rope, RopeReader
EXPORTS JaM
= BEGIN OPEN JaM;
Class: TYPE = {nil, toss, keep};
ClassArray: TYPE = REF ClassArrayRep;
ClassArrayRep: TYPE = PACKED ARRAY CHAR OF Class;
InitClassArray: PROC RETURNS[ClassArray] = {
class: ClassArray = NEW[ClassArrayRep ¬ ALL[nil]];
class[0C] ¬ class['\r] ¬ class['\l] ¬ class['\t] ¬ class[' ] ¬ class[',] ¬ toss;
class['{] ¬ class['}] ¬ class['(] ¬ class[')] ¬ class['"] ¬ class['%] ¬ keep;
RETURN[class];
};
class: ClassArray = InitClassArray[];
TokenType: TYPE = {nil, name, string, int, real, lbrace, rbrace, comment};
Token: TYPE = RECORD[type: TokenType, truncated, escaped: BOOL, start, len: INT];
nullToken: Token = [type: nil, truncated: FALSE, escaped: FALSE, start: 0, len: 0];
ScanState: TYPE = {
null, -- nothing but delimiters so far
pstring, -- inside a parenthesized string literal
qstring, -- inside a quoted string literal
esc1, -- inside quoted string, after \
esc2, -- inside quoted string, 2nd char after \
esc3, -- inside quoted string, 3rd char after \
name, -- scanning a name
plus, -- after a single +
minus, -- after a single -
dot, -- after . or +. or -.
int, -- after a valid integer
oct, -- after a valid octal number
frac, -- after a valid real with fraction part
exp1, -- scanning a real, after E
exp2, -- scanning a real, after E+ or E-
exp3, -- after a valid real with exponent
comment -- skipping over a comment
};
GetToken: PROC[reader: RopeReader.Ref] RETURNS[Token] = {
token: Token ¬ nullToken;
state: ScanState ¬ null;
pnest: INT ¬ 0; -- nesting depth of parens
end: BOOL ¬ FALSE;
token.start ¬ reader.GetIndex[];
DO char: CHAR;
char ¬ reader.Get[! RopeReader.ReadOffEnd =>
IF state=null THEN EXIT
ELSE { end ¬ TRUE; char ¬ '\n; CONTINUE }];
IF char=0C AND reader.Peek[]=0C THEN EXIT; --end of unformatted part of file
{ SELECT state FROM
null => IF class[char]=toss THEN token.start ¬ token.start+1 -- skip delimiters
ELSE SELECT char FROM
'{ => { token.type ¬ lbrace; EXIT };
'} => { token.type ¬ rbrace; EXIT };
'( => { state ¬ pstring; pnest ¬ 0 }; -- begin parenthesized string
') => { token.type ¬ name; EXIT }; -- treat unmatched ) as a name
'" => { state ¬ qstring }; -- begin quoted string
'+ => { state ¬ plus }; -- might begin number or name
'- => { state ¬ minus }; -- might begin number or name
'. => { state ¬ dot }; -- might begin real or name
'% => { state ¬ comment }; -- begin comment
IN['0..'9] => { state ¬ int }; -- begin integer
ENDCASE => { state ¬ name }; -- begin name
pstring => SELECT char FROM
'( => pnest ¬ pnest+1; -- open
') => IF pnest>0 THEN pnest ¬ pnest-1 ELSE { token.type ¬ string; EXIT }; -- close
ENDCASE => GOTO ExtendString; -- extend string
qstring => SELECT char FROM
'" => { token.type ¬ string; EXIT }; -- closing quote
'\\ => { token.escaped ¬ TRUE; state ¬ esc1 }; -- begin escape sequence
ENDCASE => GOTO ExtendString; -- extend string
esc1 => SELECT char FROM
IN['0..'9] => { state ¬ esc2 }; -- 1st of up to three digits
ENDCASE => { state ¬ qstring; GOTO ExtendString }; -- other
esc2 => SELECT char FROM
IN['0..'9] => { state ¬ esc3 }; -- 2nd digit
ENDCASE => { state ¬ qstring; GOTO ExtendString }; -- other
esc3 => SELECT char FROM
IN['0..'9] => { state ¬ qstring }; -- 3rd digit
ENDCASE => { state ¬ qstring; GOTO ExtendString }; -- other
name => GOTO TestForEnd; -- test for end of name
plus => SELECT char FROM
IN['0..'9] => { state ¬ int }; -- first integer digit
'. => { state ¬ dot }; -- might start a real
ENDCASE => GOTO TestForEnd; -- make it a name
minus => SELECT char FROM
IN['0..'9] => { state ¬ int }; -- first integer digit
'. => { state ¬ dot }; -- might start a real
ENDCASE => GOTO TestForEnd; -- make it a name
dot => SELECT char FROM
IN['0..'9] => { state ¬ frac }; -- first fraction digit
ENDCASE => GOTO TestForEnd; -- no digits after dot
int => SELECT char FROM
IN['0..'9] => { }; -- extend integer
'. => { state ¬ frac }; -- fraction coming
'B, 'b => { state ¬ oct }; -- octal number
'E, 'e => { state ¬ exp1 }; -- exponent coming
ENDCASE => GOTO TestForEnd; -- integer ends here
oct => GOTO TestForEnd; -- octal number ends here
frac => SELECT char FROM
IN['0..'9] => { }; -- extend fraction
'E, 'e => { state ¬ exp1 }; -- exponent coming
ENDCASE => GOTO TestForEnd; -- real with fraction ends here
exp1 => SELECT char FROM
'+, '- => { state ¬ exp2 }; -- exponent sign
IN['0..'9] => { state ¬ exp3 }; -- first exponent digit
ENDCASE => GOTO TestForEnd; -- make it a name
exp2 => SELECT char FROM
IN['0..'9] => { state ¬ exp3 }; -- first exponent digit
ENDCASE => GOTO TestForEnd; -- make it a name
exp3 => SELECT char FROM
IN['0..'9] => { }; -- extend exponent
ENDCASE => GOTO TestForEnd; -- real with exponent ends here
comment => SELECT char FROM
'\r, '\l => { token.type ¬ comment; EXIT }; -- end of comment
ENDCASE => { }; -- skip
ENDCASE => ERROR; -- unknown state
EXITS
ExtendString =>
IF end THEN { token.truncated ¬ TRUE; token.type ¬ string; EXIT };
TestForEnd =>
IF class[char]=nil THEN state ¬ name -- if it doesn't end here, make it a name
ELSE {
token.type ¬ SELECT state FROM
int, oct => int, frac, exp3 => real, ENDCASE => name;
IF NOT end THEN [] ¬ reader.Backwards[]; -- put the last character back
EXIT;
};
};
ENDLOOP;
token.len ¬ reader.GetIndex[]-token.start;
RETURN[token];
};
ParseInt: PROC[rope: ROPE, start, len: INT] RETURNS[x: Any] = {
RETURN[NEW[INT ¬ Convert.IntFromRope[rope.Substr[start, len]]]];
};
ParseReal: PROC[rope: ROPE, start, len: INT] RETURNS[Any] = {
{ENABLE Convert.Error =>IF reason=syntax THEN GOTO Hack;
RETURN[NEW[REAL ¬ Convert.RealFromRope[rope.Substr[start, len]]]];
EXITS Hack => --only until Convert is fixed
RETURN[NEW[REAL ¬ Convert.RealFromRope[Rope.Concat[rope.Substr[start, len],"0"]]]];
};
};
ParseAtom: PROC[rope: ROPE, start, len: INT] RETURNS[Any] = {
RETURN[RopeToAtom[rope.Substr[start, len]]];
};
RemoveEscapes: PROC[text: ROPE] RETURNS[ROPE] = {
len: INT = text.Length[];
read, write: INT ¬ 0;
Get: PROC RETURNS[ch: CHAR] = {
IF read<len THEN { ch ¬ text.Fetch[read]; read ¬ read+1 }
ELSE ch ¬ 0C;
};
Peek: PROC RETURNS[ch: CHAR] = {
IF read<len THEN { ch ¬ text.Fetch[read] }
ELSE ch ¬ 0C;
};
Put: PROC RETURNS[char: CHAR] = {
IF read < len THEN {
char ¬ Get[];
IF char = '\\ THEN {
char ¬ Get[];
SELECT char FROM
'n, 'N => char ¬ '\n;
'r, 'R => char ¬ '\r;
't, 'T => char ¬ '\t;
'b, 'B => char ¬ '\b;
'f, 'F => char ¬ '\f;
'l, 'L => char ¬ '\l;
IN['0..'7] => { d: CARDINAL ¬ char-'0;
IF Peek[] IN['0..'7] THEN { d ¬ d*8 + Get[]-'0;
IF Peek[] IN['0..'7] THEN d ¬ d*8 + Get[]-'0 };
char ¬ LOOPHOLE[d] };
ENDCASE;
};
write ¬ write+1;
}
ELSE char ¬ 0C;
};
result: ROPE = Rope.FromProc[len, Put];
RETURN[result.Substr[0, write]];
};
ParseToken: PROC[token: Token, rope: ROPE] RETURNS[Any] = {
SELECT token.type FROM
int => RETURN[ParseInt[rope, token.start, token.len]];
real => RETURN[ParseReal[rope, token.start, token.len]];
name => RETURN[ParseAtom[rope, token.start, token.len]];
string => {
s: ROPE = rope.Substr[token.start+1, token.len-(IF token.truncated THEN 1 ELSE 2)];
RETURN[IF token.escaped THEN RemoveEscapes[s] ELSE s];
};
ENDCASE => ERROR; -- unexpected token type
};
GetReader: PROC[rope: ROPE] RETURNS[RopeReader.Ref] = {
reader: RopeReader.Ref = RopeReader.GetRopeReader[];
reader.SetPosition[rope];
RETURN[reader];
};
ExecuteRope: PUBLIC PROC[self: State, rope: ROPE] = {
reader: RopeReader.Ref = GetReader[rope];
error: BOOL ¬ FALSE;
GetRestOfArray: PROC RETURNS[Array] = {
PushMark[self];
DO token: Token = GetToken[reader];
SELECT token.type FROM
nil => { error ¬ TRUE; EXIT }; -- I give up... I've reached the end of my rope!
comment => NULL; -- ignore comment
lbrace => PushArray[self, GetRestOfArray[]];
rbrace => EXIT;
ENDCASE => Push[self, ParseToken[token, rope]];
ENDLOOP;
{ length: INT = CountToMark[self];
array: Array = MakeArray[self, length];
PopMark[self];
RETURN[array];
};
};
DO token: Token = GetToken[reader];
SELECT token.type FROM
nil => EXIT; -- end of rope
comment => NULL; -- ignore comment
lbrace => PushArray[self, GetRestOfArray[]];
rbrace => Execute[self, ParseAtom[rope, token.start, token.len]];
ENDCASE => Execute[self, ParseToken[token, rope]];
IF token.truncated THEN error ¬ TRUE;
ENDLOOP;
RopeReader.FreeRopeReader[reader];
};
LineComplete: PUBLIC PROC[text: ROPE] RETURNS[BOOL] = {
reader: RopeReader.Ref = GetReader[text];
error: BOOL ¬ FALSE;
return: BOOL ¬ TRUE;
GetRestOfArray: PROC = {
DO token: Token = GetToken[reader];
SELECT token.type FROM
nil => { error ¬ TRUE; EXIT }; -- I give up... I've reached the end of my rope!
comment => NULL; -- ignore comment
lbrace => GetRestOfArray[];
rbrace => EXIT;
ENDCASE;
ENDLOOP;
};
DO token: Token = GetToken[reader];
SELECT token.type FROM
nil => EXIT; -- end of rope
comment => NULL; -- ignore comment
lbrace => GetRestOfArray[];
rbrace => NULL;
ENDCASE;
IF token.truncated OR error = TRUE THEN {return ¬ FALSE; EXIT};
ENDLOOP;
RopeReader.FreeRopeReader[reader];
RETURN[return];
};
END.