PSScannerImpl.mesa
Copyright Ó 1987 by Xerox Corporation. All rights reserved.
Doug Wyatt, August 11, 1987 7:14:41 pm PDT
DIRECTORY
PS,
Ascii,
Convert,
IO,
RefText;
PSScannerImpl: CEDAR PROGRAM
IMPORTS PS, Convert, IO, RefText
EXPORTS PS
~ BEGIN OPEN PS;
Class:
TYPE ~ {
newLine, -- CR, LF
space, -- SP, TAB
openParen, -- '(
closeParen, -- ')
openBracket, -- '[
closeBracket, -- ']
openAngle, -- '<
closeAngle, -- '>
openBrace, -- '{
closeBrace, -- '}
slash, -- '/
percent, -- '%
dot, -- '.
hash, -- '#
sign, -- '+, '-
digitOctal, -- IN ['0..'7]
digitOther, -- IN ['0..'9] (other than digitOctal)
letterE, -- 'E, 'e
letterHex, -- IN ['A..'F], IN ['a..'f] (other than letterE)
letterOther, -- IN ['A..'Z], IN ['a..'z] (other than letterE, letterHex)
backslash, -- '\
other -- everything else
};
WhiteSpace: TYPE ~ Class[newLine..space];
Special: TYPE ~ Class[openParen..percent];
Regular: TYPE ~ Class[dot..other];
DecimalDigit: TYPE ~ Class[digitOctal..digitOther]; -- ['0..'9]
HexDigit: TYPE ~ Class[digitOctal..letterHex]; -- ['0..'9], ['A..'F], ['a..'f]
RadixDigit:
TYPE ~ Class[digitOctal..letterOther];
-- ['0..'9], ['A..'Z], ['a..'z]
ClassArray:
TYPE ~
PACKED
ARRAY
CHAR
OF Class;
InitClassArray:
PROC
RETURNS [array:
REF ClassArray] ~ {
array ← NEW [ClassArray];
FOR char:
CHAR
IN
CHAR
DO
array[char] ←
SELECT char
FROM
Ascii.CR, Ascii.LF => newLine,
Ascii.SP, Ascii.TAB => space,
'( => openParen,
') => closeParen,
'[ => openBracket,
'] => closeBracket,
'< => openAngle,
'> => closeAngle,
'{ => openBrace,
'} => closeBrace,
'/ => slash,
'% => percent,
'. => dot,
'# => hash,
'+, '- => sign,
IN ['0..'7] => digitOctal,
IN ['0..'9] => digitOther,
'E, 'e => letterE,
IN ['A..'F], IN ['a..'f] => letterHex,
IN ['A..'Z], IN ['a..'z] => letterOther,
'\\ => backslash,
ENDCASE => other;
ENDLOOP;
};
classFromChar:
REF ClassArray ~ InitClassArray[];
DigitArray:
TYPE ~
PACKED
ARRAY
CHAR
OF Digit;
InitDigitArray:
PROC
RETURNS [array:
REF DigitArray] ~ {
array ← NEW [DigitArray];
FOR char:
CHAR
IN
CHAR
DO
array[char] ←
SELECT char
FROM
IN ['0..'9] => char-'0,
IN ['A..'Z] => 10+(char-'A),
IN ['a..'z] => 10+(char-'a),
ENDCASE => 0;
ENDLOOP;
};
digit:
REF DigitArray ~ InitDigitArray[];
EscapeArray:
TYPE ~
PACKED
ARRAY
CHAR
OF
CHAR;
InitEscapeArray:
PROC
RETURNS [array:
REF EscapeArray] ~ {
array ← NEW [EscapeArray];
FOR char:
CHAR
IN
CHAR
DO
array[char] ←
SELECT char
FROM
'n => Ascii.LF,
'r => Ascii.CR,
't => Ascii.TAB,
'b => Ascii.BS,
'f => Ascii.FF,
IN ['0..'7] => VAL[digit[char]],
ENDCASE => char;
ENDLOOP;
};
escape:
REF EscapeArray ~ InitEscapeArray[];
State:
TYPE ~ {
empty, -- only whitespace so far
sign, -- + | -
int0, -- digit | int0 digit a valid integer (without sign)
int1, -- sign digit | int1 digit a valid integer (with sign)
real0, -- . | sign .
real1, -- ( int0 | int1 ) . | real0 digit | real1 digit a valid real (without exponent)
real2, -- ( int0 | int1 | real1 ) ( e | E )
real3, -- real2 ( + | - )
real4, -- ( real2 | real3 ) digit | real4 digit a valid real (with exponent)
radix0, -- int0 #
radix1, -- radix0 rdigit | radix1 rdigit a valid radix number
slash, -- /
name, -- char | slash char | // | name char a sequence of regular chars
string0, -- ( ...
string1, -- ( ... \
string2, -- ( ... \d
string3, -- ( ... \dd
hex0, -- < ... even number of digits
hex1, -- < ... odd number of digits
comment, -- % ...
string, -- ( ... )
hex, -- < ... >
proc0, -- {
proc1, -- }
error -- syntax error
};
Action:
TYPE ~ {
skip, -- ignore char
putBack, -- return char to input source
append, -- append char to text buffer
noteSlash, -- note slash preceding name (ignore '/ and increment slash count)
stringStart, -- start string literal (ignore '( and start with next char)
stringChar, -- include char in string literal
escapeStart, -- begin escape sequence in string literal with '\
escapeChar, -- translate escape char following '\
escapeDigit, -- include another octal digit in escape sequence
hexDigit1, -- char is first hex digit
hexDigit2, -- char is second hex digit, append byte
hexFill -- use zero for missing second digit and append byte
};
TransitionResult:
TYPE ~
RECORD [action: Action, state: State, stop:
BOOL ←
FALSE];
Transition:
PROC [state: State, class: Class]
RETURNS [TransitionResult] ~ {
SELECT state
FROM
empty => {
IF class IN WhiteSpace THEN RETURN [[action: skip, state: empty]];
IF class
IN Special
THEN
SELECT class
FROM
openBrace => RETURN [[action: skip, state: proc0, stop: TRUE]];
closeBrace => RETURN [[action: skip, state: proc1, stop: TRUE]];
openBracket, closeBracket => RETURN [[action: append, state: name, stop: TRUE]];
openParen => RETURN [[action: stringStart, state: string0]];
openAngle => RETURN [[action: skip, state: hex0]];
slash => RETURN [[action: noteSlash, state: slash]];
percent => RETURN [[action: skip, state: comment]];
ENDCASE => RETURN [[action: append, state: error, stop: TRUE]];
};
IN[sign..name] =>
IF state=slash
AND class=slash
THEN
NULL
ELSE {
IF class IN WhiteSpace THEN RETURN [[action: skip, state: state, stop: TRUE]];
IF class IN Special THEN RETURN [[action: putBack, state: state, stop: TRUE]];
};
IN[hex0..hex1] => {
IF class IN WhiteSpace THEN RETURN [[action: skip, state: state]];
};
ENDCASE;
SELECT state
FROM
empty =>
SELECT class
FROM
sign => RETURN [[action: append, state: sign]];
dot => RETURN [[action: append, state: real0]];
IN DecimalDigit => RETURN [[action: append, state: int0]];
ENDCASE => RETURN [[action: append, state: name]];
sign =>
SELECT class
FROM
dot => RETURN [[action: append, state: real0]];
IN DecimalDigit => RETURN [[action: append, state: int1]];
ENDCASE => RETURN [[action: append, state: name]];
int0 =>
SELECT class
FROM
dot => RETURN [[action: append, state: real1]];
letterE => RETURN [[action: append, state: real2]];
hash => RETURN [[action: append, state: radix0]];
IN DecimalDigit => RETURN [[action: append, state: int0]];
ENDCASE => RETURN [[action: append, state: name]];
int1 =>
SELECT class
FROM
dot => RETURN [[action: append, state: real1]];
letterE => RETURN [[action: append, state: real2]];
IN DecimalDigit => RETURN [[action: append, state: int1]];
ENDCASE => RETURN [[action: append, state: name]];
real0 =>
SELECT class
FROM
IN DecimalDigit => RETURN [[action: append, state: real1]];
ENDCASE => RETURN [[action: append, state: name]];
real1 =>
SELECT class
FROM
letterE => RETURN [[action: append, state: real2]];
IN DecimalDigit => RETURN [[action: append, state: real1]];
ENDCASE => RETURN [[action: append, state: name]];
real2 =>
SELECT class
FROM
sign => RETURN [[action: append, state: real3]];
IN DecimalDigit => RETURN [[action: append, state: real4]];
ENDCASE => RETURN [[action: append, state: name]];
real3 =>
SELECT class
FROM
IN DecimalDigit => RETURN [[action: append, state: real4]];
ENDCASE => RETURN [[action: append, state: name]];
real4 =>
SELECT class
FROM
IN DecimalDigit => RETURN [[action: append, state: real4]];
ENDCASE => RETURN [[action: append, state: name]];
radix0 =>
SELECT class
FROM
IN RadixDigit => RETURN [[action: append, state: radix1]];
ENDCASE => RETURN [[action: append, state: name]];
radix1 =>
SELECT class
FROM
IN RadixDigit => RETURN [[action: append, state: radix1]];
ENDCASE => RETURN [[action: append, state: name]];
slash =>
SELECT class
FROM
slash => RETURN [[action: noteSlash, state: name]];
ENDCASE => RETURN [[action: append, state: name]];
name => RETURN [[action: append, state: name]];
string0 =>
SELECT class
FROM
closeParen => RETURN [[action: skip, state: string, stop: TRUE]];
backslash => RETURN [[action: escapeStart, state: string1]];
ENDCASE => RETURN [[action: stringChar, state: string0]];
string1 =>
SELECT class
FROM
newLine => RETURN [[action: skip, state: string0]];
digitOctal => RETURN [[action: escapeChar, state: string2]];
ENDCASE => RETURN [[action: escapeChar, state: string0]];
string2 =>
SELECT class
FROM
closeParen => RETURN [[action: skip, state: string, stop: TRUE]];
digitOctal => RETURN [[action: escapeDigit, state: string3]];
ENDCASE => RETURN [[action: stringChar, state: string0]];
string3 =>
SELECT class
FROM
digitOctal => RETURN [[action: escapeDigit, state: string0]];
ENDCASE => RETURN [[action: stringChar, state: string0]];
hex0 =>
SELECT class
FROM
closeAngle => RETURN [[action: skip, state: hex, stop: TRUE]];
IN HexDigit => RETURN [[action: hexDigit1, state: hex1]];
ENDCASE => RETURN [[action: skip, state: error, stop: TRUE]];
hex1 =>
SELECT class
FROM
closeAngle => RETURN [[action: hexFill, state: hex, stop: TRUE]];
IN HexDigit => RETURN [[action: hexDigit2, state: hex0]];
ENDCASE => RETURN [[action: skip, state: error, stop: TRUE]];
comment =>
SELECT class
FROM
newLine => RETURN [[action: skip, state: empty]];
ENDCASE => RETURN [[action: skip, state: comment]];
ENDCASE => RETURN [[action: skip, state: error, stop: TRUE]];
};
TransitionTable: TYPE ~ ARRAY State OF REF TransitionArray;
TransitionArray:
TYPE ~
ARRAY Class
OF TransitionResult;
InitTransitionTable:
PROC
RETURNS [table:
REF TransitionTable] ~ {
table ← NEW [TransitionTable];
FOR state: State
IN State
DO
array: REF TransitionArray ~ NEW [TransitionArray];
FOR class: Class
IN Class
DO
array[class] ← Transition[state, class];
ENDLOOP;
table[state] ← array;
ENDLOOP;
};
transition:
REF TransitionTable ~ InitTransitionTable[];
TokenInt:
PROC [text:
REF
READONLY
TEXT]
RETURNS [Any] ~ {
RETURN [AnyFromInt[Convert.IntFromRope[RefText.TrustTextAsRope[text]
! Convert.Error => GOTO Overflow]]];
EXITS Overflow => RETURN [TokenReal[text]];
};
TokenReal:
PROC [text:
REF
READONLY
TEXT]
RETURNS [Any] ~ {
RETURN [AnyFromReal[Convert.RealFromRope[RefText.TrustTextAsRope[text]
! Convert.Error => GOTO Overflow]]];
EXITS Overflow => RETURN [TokenName[text]];
};
RClass:
TYPE ~ {
digit, -- IN ['0..'9]
sign, -- '+, '-
dot, -- '.
letterE, -- 'E, 'e
other -- everything else
};
RClassArray:
TYPE ~
PACKED
ARRAY
CHAR
OF RClass;
InitRClassArray:
PROC
RETURNS [array:
REF RClassArray] ~ {
array ← NEW [RClassArray];
FOR char:
CHAR
IN
CHAR
DO
array[char] ←
SELECT char
FROM
IN ['0..'9] => digit,
'+, '- => sign,
'. => dot,
'E, 'e => letterE,
ENDCASE => other;
ENDLOOP;
};
rclassFromChar:
REF RClassArray ~ InitRClassArray[];
RState:
TYPE ~ {
empty, -- only whitespace so far
sign, -- + | -
int, -- ( empty | sign ) digit | int digit a valid integer
real0, -- . | sign .
real1, -- int . | real0 digit | real1 digit a valid real (without exponent)
real2, -- ( int | real1 ) ( e | E )
real3, -- real2 ( + | - )
real4, -- ( real2 | real3 ) digit | real4 digit a valid real (with exponent)
error -- syntax error
};
RealFromText: PROC [text: REF READONLY TEXT] RETURNS [REAL] ~ {
state: RState ← empty;
fr: INT ← 0; -- mantissa
exp, adj: INTEGER ← 0; -- exponent and adjustment
mNeg, eNeg: BOOL ← FALSE;
mDigits, eDigits: NAT ← 0; -- significant digits
FOR i: NAT IN[0..text.length) DO
char: CHAR ~ text[i];
class: RClass ~ rclassFromChar[char];
SELECT state FROM
empty => SELECT class FROM
sign => { state ← sign; action ← msign };
digit => { state ← int; action ← mdigit };
dot => state ← real0;
ENDCASE => GOTO Fail;
sign => SELECT class FROM
digit => { state ← int; action ← mdigit };
dot => state ← real0;
ENDCASE => GOTO Fail;
int => SELECT class FROM
digit => action ← mdigit;
dot => state ← real1;
letterE => state ← real2;
ENDCASE => GOTO Fail;
real0 => SELECT class FROM
digit => { state ← real1; action ← mdigit };
ENDCASE => GOTO Fail;
real1 => SELECT class FROM
digit => action ← mdigit;
'e, 'E => state ← real2;
ENDCASE => GOTO Fail;
real2 => SELECT class FROM
sign => { state ← real3; action ← esign };
digit => { state ← real4; action ← edigit };
ENDCASE => GOTO Fail;
real3 => SELECT class FROM
digit => { state ← real4; action ← edigit };
ENDCASE => GOTO Fail;
real4 => SELECT c FROM
digit => action ← edigit;
ENDCASE => GOTO Fail;
ENDCASE => GOTO Fail;
SELECT action FROM
msign => mNeg ← (char='-);
mdigit => { d: [0..10) ~ char-'0;
IF state=frac THEN adj ← adj-1;
IF mDigits=0 AND d=0 THEN NULL -- leading zero
ELSE IF mDigits<9 THEN { fr ← fr*10+d; mDigits ← mDigits+1 }
ELSE { adj ← adj+1; IF mDigits=9 AND d>5 THEN fr ← fr+1; }; -- round if 10th digit >5
};
esign => eNeg ← (char='-);
edigit => { d: [0..10) ~ char-'0;
IF eDigits=0 AND d=0 THEN NULL -- leading zero
ELSE IF eDigits<3 THEN { exp ← exp*10+d; eDigits ← eDigits+1 }
ELSE ERROR Error[reason: $overflow];
};
ENDCASE;
REPEAT Fail => ERROR;
ENDLOOP;
SELECT state FROM
int, real1, real4 => NULL; -- ok
ENDCASE => ERROR Error[reason: $syntax];
IF eNeg THEN exp ← -exp;
real ← Real.PairToReal[fr: fr, exp10: exp+adj ! Real.RealException => RESUME];
IF mNeg THEN real ← -real;
};
TokenRadix:
PROC [text:
REF
READONLY
TEXT]
RETURNS [Any] ~ {
state: {radix, val} ← radix;
radix: NAT ← 0;
val, limit: CARD ← 0;
FOR i:
NAT
IN [0..text.length)
DO
char: CHAR ~ text[i];
d: Digit ~ digit[char];
SELECT state
FROM
radix =>
SELECT char
FROM
IN ['0..'9] => {
radix ← radix*10+d;
IF radix>36 THEN GOTO Bogus;
};
'# => {
IF radix NOT IN [2..36] THEN GOTO Bogus;
limit ← CARD.LAST/radix;
state ← val;
};
ENDCASE => GOTO Bogus;
val => {
IF NOT d<radix THEN GOTO Bogus;
IF val>limit THEN GOTO Bogus ELSE val ← val*radix;
IF d>(CARD.LAST-val) THEN GOTO Bogus ELSE val ← val+d;
};
ENDCASE => ERROR;
ENDLOOP;
RETURN [AnyFromInt[LOOPHOLE[val]]];
EXITS Bogus => RETURN [TokenName[text]];
};
TokenName:
PROC [text:
REF
READONLY
TEXT]
RETURNS [Any] ~ {
RETURN [AnyFromName[NameFromText[text]]];
};
TokenString:
PROC [text:
REF
TEXT]
RETURNS [Any] ~ {
RETURN [AnyFromString[StringFromText[StringCreate[text.length], text]]];
};
TokenProc:
PROC [self: Root]
RETURNS [Any] ~ {
size: INT ~ CountToMark[self];
array: Array ~ ArrayCreate[size];
AStore[self, array];
PopMark[self];
RETURN [CvX[AnyFromArray[array]]];
};
FileToken:
PUBLIC
PROC [self: Root, file: File]
RETURNS [found:
BOOL, token: Any] ~ {
stream: IO.STREAM ~ file.ref;
text: REF TEXT ← self.buffer;
maxLength: NAT ← text.maxLength;
nest: INT ← 0; -- procedure nesting level
DO
-- for each token in procedure
state: State ← empty;
slashes: [0..2] ← 0; -- number of slashes preceding name
val: CARDINAL ← 0;
length: NAT ← 0; -- text length
ExpandText:
PROC ~ {
text.length ← length; text ← RefText.ReserveChars[text, 1]; maxLength ← text.maxLength;
};
Append:
PROC [char:
CHAR] ~
INLINE {
IF length=maxLength THEN ExpandText[];
text[length] ← char; length ← length+1;
};
DO
-- for each character
char:
CHAR ~
IO.GetChar[stream !
IO.EndOfStream => EXIT;
IO.Error => IF ec=StreamClosed THEN EXIT ELSE GOTO IOError;
];
result: TransitionResult ~ transition[state][classFromChar[char]];
state ← result.state;
SELECT result.action
FROM
skip, stringStart, escapeStart => NULL;
putBack => IO.Backup[stream, char];
noteSlash => slashes ← slashes+1;
append, stringChar => Append[char];
escapeChar => Append[escape[char]];
escapeDigit => text[length-1] ← VAL[(ORD[text[length-1]]*8+digit[char]) MOD 256];
hexDigit1 => val ← digit[char];
hexDigit2 => Append[VAL[val*16+digit[char]]];
hexFill => Append[VAL[val*16]];
ENDCASE => ERROR Bug;
IF result.stop THEN EXIT;
ENDLOOP;
text.length ← length;
SELECT state
FROM
empty, comment => { token ← null; EXIT };
int0, int1 => token ← TokenInt[text];
real1, real4 => token ← TokenReal[text];
radix1 => token ← TokenRadix[text];
sign, real0, real2, real3, radix0, slash, name => token ← TokenName[text];
string, hex => token ← TokenString[text];
proc0 => { token ← mark; nest ← nest+1 };
proc1 => { IF nest>0 THEN token ← TokenProc[self]; nest ← nest-1 };
ENDCASE => ERROR Error[syntaxerror];
SELECT slashes
FROM
1 => token ← CvLit[token]; -- literal name
2 => token ← Load[self, token]; -- immediately evaluated name
ENDCASE;
IF nest>0 THEN PushAny[self, token] ELSE EXIT;
ENDLOOP;
IF nest#0 THEN Error[syntaxerror];
found ← Type[token]#null;
EXITS IOError => ERROR Error[ioerror];
};
StringToken:
PUBLIC
PROC [self: Root, string: String]
RETURNS [found:
BOOL, token: Any, post: String] ~ {
text: REF TEXT ← IF self=NIL THEN NIL ELSE self.buffer;
maxLength: NAT ← IF text=NIL THEN 0 ELSE text.maxLength;
nest: INT ← 0; -- procedure nesting level
stringLength: StringIndex ~ StringLength[string];
index: StringIndex ← 0;
DO
-- for each token in procedure
state: State ← empty;
slashes: [0..2] ← 0; -- number of slashes preceding name
val: CARDINAL ← 0;
length: NAT ← 0;
start, stop: StringIndex ← 0;
ExpandText:
PROC ~ {
IF text=NIL THEN text ← RefText.New[20];
text.length ← length; text ← RefText.ReserveChars[text, 1]; maxLength ← text.maxLength;
};
Append:
PROC [char:
CHAR] ~
INLINE {
IF length=maxLength THEN ExpandText[];
text[length] ← char; length ← length+1;
};
WHILE index<stringLength
DO
-- for each character
char: CHAR ~ string.ref[string.val.start+index];
result: TransitionResult ~ transition[state][classFromChar[char]];
index ← index+1;
state ← result.state;
SELECT result.action
FROM
skip => NULL;
putBack => index ← index-1;
noteSlash => slashes ← slashes+1;
stringStart => start ← stop ← index;
stringChar => stop ← index;
escapeStart => { stop ← index; state ← string0 }; -- no escape convention in strings
append => Append[char];
hexDigit1 => val ← digit[char];
hexDigit2 => Append[VAL[val*16+digit[char]]];
hexFill => Append[VAL[val*16]];
ENDCASE => ERROR Bug;
IF result.stop THEN EXIT;
ENDLOOP;
text.length ← length;
SELECT state
FROM
empty, comment => { token ← null; EXIT };
int0, int1 => token ← TokenInt[text];
real1, real4 => token ← TokenReal[text];
radix1 => token ← TokenRadix[text];
sign, real0, real2, real3, radix0, slash, name => token ← TokenName[text];
string => token ← CvLit[AnyFromString[StringGetInterval[string, start, stop-start]]];
hex => token ← TokenString[text];
proc0 => { token ← mark; nest ← nest+1 };
proc1 => { IF nest>0 THEN token ← TokenProc[self]; nest ← nest-1 };
ENDCASE => ERROR Error[syntaxerror];
SELECT slashes
FROM
1 => token ← CvLit[token]; -- literal name
2 => token ← Load[self, token]; -- immediately evaluated name
ENDCASE;
IF nest>0 AND self#NIL THEN PushAny[self, token] ELSE EXIT;
ENDLOOP;
IF nest#0 THEN Error[syntaxerror];
found ← Type[token]#null;
post ← StringGetInterval[string, index, stringLength-index];
};
END.