PSScannerImpl.mesa
Copyright Ó 1987 by Xerox Corporation. All rights reserved.
Doug Wyatt, August 20, 1987 4:32:43 pm PDT
DIRECTORY
PS,
Ascii,
Convert,
IO,
Real,
RefText;
PSScannerImpl: CEDAR PROGRAM
IMPORTS PS, Convert, IO, Real, RefText
EXPORTS PS
~ BEGIN OPEN PS;
Class: TYPE ~ {
newLine, -- CR, LF
space, -- SP, TAB
openParen, -- '(
closeParen, -- ')
openBracket, -- '[
closeBracket, -- ']
openAngle, -- '<
closeAngle, -- '>
openBrace, -- '{
closeBrace, -- '}
slash, -- '/
percent, -- '%
dot, -- '.
hash, -- '#
sign, -- '+, '-
digitOctal, -- IN ['0..'7]
digitOther, -- IN ['0..'9] (other than digitOctal)
letterE, -- 'E, 'e
letterHex, -- IN ['A..'F], IN ['a..'f] (other than letterE)
letterOther, -- IN ['A..'Z], IN ['a..'z] (other than letterE, letterHex)
backslash, -- '\
other -- everything else
};
WhiteSpace: TYPE ~ Class[newLine..space];
Special: TYPE ~ Class[openParen..percent];
Regular: TYPE ~ Class[dot..other];
DecimalDigit: TYPE ~ Class[digitOctal..digitOther]; -- ['0..'9]
HexDigit: TYPE ~ Class[digitOctal..letterHex]; -- ['0..'9], ['A..'F], ['a..'f]
RadixDigit: TYPE ~ Class[digitOctal..letterOther]; -- ['0..'9], ['A..'Z], ['a..'z]
ClassArray: TYPE ~ PACKED ARRAY CHAR OF Class;
InitClassArray: PROC RETURNS [array: REF ClassArray] ~ {
array ← NEW [ClassArray];
FOR char: CHAR IN CHAR DO
array[char] ← SELECT char FROM
Ascii.CR, Ascii.LF => newLine,
Ascii.SP, Ascii.TAB => space,
'( => openParen,
') => closeParen,
'[ => openBracket,
'] => closeBracket,
'< => openAngle,
'> => closeAngle,
'{ => openBrace,
'} => closeBrace,
'/ => slash,
'% => percent,
'. => dot,
'# => hash,
'+, '- => sign,
IN ['0..'7] => digitOctal,
IN ['0..'9] => digitOther,
'E, 'e => letterE,
IN ['A..'F], IN ['a..'f] => letterHex,
IN ['A..'Z], IN ['a..'z] => letterOther,
'\\ => backslash,
ENDCASE => other;
ENDLOOP;
};
classFromChar: REF ClassArray ~ InitClassArray[];
Digit: TYPE ~ [0..36);
DigitArray: TYPE ~ PACKED ARRAY CHAR OF Digit;
InitDigitArray: PROC RETURNS [array: REF DigitArray] ~ {
array ← NEW [DigitArray];
FOR char: CHAR IN CHAR DO
array[char] ← SELECT char FROM
IN ['0..'9] => char-'0,
IN ['A..'Z] => 10+(char-'A),
IN ['a..'z] => 10+(char-'a),
ENDCASE => 0;
ENDLOOP;
};
digit: REF DigitArray ~ InitDigitArray[];
EscapeArray: TYPE ~ PACKED ARRAY CHAR OF CHAR;
InitEscapeArray: PROC RETURNS [array: REF EscapeArray] ~ {
array ← NEW [EscapeArray];
FOR char: CHAR IN CHAR DO
array[char] ← SELECT char FROM
'n => Ascii.LF,
'r => Ascii.CR,
't => Ascii.TAB,
'b => Ascii.BS,
'f => Ascii.FF,
IN ['0..'7] => VAL[digit[char]],
ENDCASE => char;
ENDLOOP;
};
escape: REF EscapeArray ~ InitEscapeArray[];
State: TYPE ~ {
empty, -- only whitespace so far
sign, -- + | -
int0, -- digit | int0 digit a valid integer (without sign)
int1, -- sign digit | int1 digit a valid integer (with sign)
real0, -- . | sign .
real1, -- ( int0 | int1 ) . | real0 digit | real1 digit a valid real (without exponent)
real2, -- ( int0 | int1 | real1 ) ( e | E )
real3, -- real2 ( + | - )
real4, -- ( real2 | real3 ) digit | real4 digit a valid real (with exponent)
radix0, -- int0 #
radix1, -- radix0 rdigit | radix1 rdigit a valid radix number
slash, -- /
name, -- char | slash char | // | name char a sequence of regular chars
string0, -- ( ...
string1, -- ( ... \
string2, -- ( ... \d
string3, -- ( ... \dd
hex0, -- < ... even number of digits
hex1, -- < ... odd number of digits
comment, -- % ...
string, -- ( ... )
hex, -- < ... >
proc0, -- {
proc1, -- }
error -- syntax error
};
Action: TYPE ~ {
skip, -- ignore char
putBack, -- return char to input source
append, -- append char to text buffer
noteSlash, -- note slash preceding name (ignore '/ and increment slash count)
stringStart, -- start string literal (ignore '( and start with next char)
stringChar, -- include char in string literal
escapeStart, -- begin escape sequence in string literal with '\
escapeChar, -- translate escape char following '\
escapeDigit, -- include another octal digit in escape sequence
hexDigit1, -- char is first hex digit
hexDigit2, -- char is second hex digit, append byte
hexFill -- use zero for missing second digit and append byte
};
TransitionResult: TYPE ~ RECORD [action: Action, state: State, stop: BOOLFALSE];
Transition: PROC [state: State, class: Class] RETURNS [TransitionResult] ~ {
SELECT state FROM
empty => {
IF class IN WhiteSpace THEN RETURN [[action: skip, state: empty]];
IF class IN Special THEN SELECT class FROM
openBrace => RETURN [[action: skip, state: proc0, stop: TRUE]];
closeBrace => RETURN [[action: skip, state: proc1, stop: TRUE]];
openBracket, closeBracket => RETURN [[action: append, state: name, stop: TRUE]];
openParen => RETURN [[action: stringStart, state: string0]];
openAngle => RETURN [[action: skip, state: hex0]];
slash => RETURN [[action: noteSlash, state: slash]];
percent => RETURN [[action: skip, state: comment]];
ENDCASE => RETURN [[action: append, state: error, stop: TRUE]];
};
IN[sign..name] => IF state=slash AND class=slash THEN NULL ELSE {
IF class IN WhiteSpace THEN RETURN [[action: skip, state: state, stop: TRUE]];
IF class IN Special THEN RETURN [[action: putBack, state: state, stop: TRUE]];
};
IN[hex0..hex1] => {
IF class IN WhiteSpace THEN RETURN [[action: skip, state: state]];
};
ENDCASE;
SELECT state FROM
empty => SELECT class FROM
sign => RETURN [[action: append, state: sign]];
dot => RETURN [[action: append, state: real0]];
IN DecimalDigit => RETURN [[action: append, state: int0]];
ENDCASE => RETURN [[action: append, state: name]];
sign => SELECT class FROM
dot => RETURN [[action: append, state: real0]];
IN DecimalDigit => RETURN [[action: append, state: int1]];
ENDCASE => RETURN [[action: append, state: name]];
int0 => SELECT class FROM
dot => RETURN [[action: append, state: real1]];
letterE => RETURN [[action: append, state: real2]];
hash => RETURN [[action: append, state: radix0]];
IN DecimalDigit => RETURN [[action: append, state: int0]];
ENDCASE => RETURN [[action: append, state: name]];
int1 => SELECT class FROM
dot => RETURN [[action: append, state: real1]];
letterE => RETURN [[action: append, state: real2]];
IN DecimalDigit => RETURN [[action: append, state: int1]];
ENDCASE => RETURN [[action: append, state: name]];
real0 => SELECT class FROM
IN DecimalDigit => RETURN [[action: append, state: real1]];
ENDCASE => RETURN [[action: append, state: name]];
real1 => SELECT class FROM
letterE => RETURN [[action: append, state: real2]];
IN DecimalDigit => RETURN [[action: append, state: real1]];
ENDCASE => RETURN [[action: append, state: name]];
real2 => SELECT class FROM
sign => RETURN [[action: append, state: real3]];
IN DecimalDigit => RETURN [[action: append, state: real4]];
ENDCASE => RETURN [[action: append, state: name]];
real3 => SELECT class FROM
IN DecimalDigit => RETURN [[action: append, state: real4]];
ENDCASE => RETURN [[action: append, state: name]];
real4 => SELECT class FROM
IN DecimalDigit => RETURN [[action: append, state: real4]];
ENDCASE => RETURN [[action: append, state: name]];
radix0 => SELECT class FROM
IN RadixDigit => RETURN [[action: append, state: radix1]];
ENDCASE => RETURN [[action: append, state: name]];
radix1 => SELECT class FROM
IN RadixDigit => RETURN [[action: append, state: radix1]];
ENDCASE => RETURN [[action: append, state: name]];
slash => SELECT class FROM
slash => RETURN [[action: noteSlash, state: name]];
ENDCASE => RETURN [[action: append, state: name]];
name => RETURN [[action: append, state: name]];
string0 => SELECT class FROM
closeParen => RETURN [[action: skip, state: string, stop: TRUE]];
backslash => RETURN [[action: escapeStart, state: string1]];
ENDCASE => RETURN [[action: stringChar, state: string0]];
string1 => SELECT class FROM
newLine => RETURN [[action: skip, state: string0]];
digitOctal => RETURN [[action: escapeChar, state: string2]];
ENDCASE => RETURN [[action: escapeChar, state: string0]];
string2 => SELECT class FROM
closeParen => RETURN [[action: skip, state: string, stop: TRUE]];
digitOctal => RETURN [[action: escapeDigit, state: string3]];
ENDCASE => RETURN [[action: stringChar, state: string0]];
string3 => SELECT class FROM
digitOctal => RETURN [[action: escapeDigit, state: string0]];
ENDCASE => RETURN [[action: stringChar, state: string0]];
hex0 => SELECT class FROM
closeAngle => RETURN [[action: skip, state: hex, stop: TRUE]];
IN HexDigit => RETURN [[action: hexDigit1, state: hex1]];
ENDCASE => RETURN [[action: skip, state: error, stop: TRUE]];
hex1 => SELECT class FROM
closeAngle => RETURN [[action: hexFill, state: hex, stop: TRUE]];
IN HexDigit => RETURN [[action: hexDigit2, state: hex0]];
ENDCASE => RETURN [[action: skip, state: error, stop: TRUE]];
comment => SELECT class FROM
newLine => RETURN [[action: skip, state: empty]];
ENDCASE => RETURN [[action: skip, state: comment]];
ENDCASE => RETURN [[action: skip, state: error, stop: TRUE]];
};
TransitionTable: TYPE ~ ARRAY State OF REF TransitionArray;
TransitionArray: TYPE ~ ARRAY Class OF TransitionResult;
InitTransitionTable: PROC RETURNS [table: REF TransitionTable] ~ {
table ← NEW [TransitionTable];
FOR state: State IN State DO
array: REF TransitionArray ~ NEW [TransitionArray];
FOR class: Class IN Class DO
array[class] ← Transition[state, class];
ENDLOOP;
table[state] ← array;
ENDLOOP;
};
transition: REF TransitionTable ~ InitTransitionTable[];
RClass: TYPE ~ {
digit, -- IN ['0..'9]
sign, -- '+, '-
dot, -- '.
letterE, -- 'E, 'e
other -- everything else
};
RClassArray: TYPE ~ ARRAY CHAR OF RClass;
RClassInit: PROC RETURNS [array: REF RClassArray] ~ {
array ← NEW [RClassArray];
FOR char: CHAR IN CHAR DO
array[char] ← SELECT char FROM
IN ['0..'9] => digit,
'+, '- => sign,
'. => dot,
'E, 'e => letterE,
ENDCASE => other;
ENDLOOP;
};
rclassFromChar: REF RClassArray ~ RClassInit[];
RState: TYPE ~ {
empty, -- beginning
msign, -- + | -
int, -- ( empty | msign | int ) digit integer part
dot, -- ( empty | msign ) .
idot, -- int .
frac, -- ( dot | idot | frac ) digit fraction part
epref, -- ( int | idot | frac ) ( e | E )
esign, -- epref ( + | - )
exp, -- ( epref | esign | exp ) digit exponent part
error
};
RTransition: PROC [state: RState, class: RClass] RETURNS [RState] ~ {
RETURN [SELECT state FROM
empty => SELECT class FROM digit => int, sign => msign, dot => dot,
ENDCASE => error,
msign => SELECT class FROM digit => int, dot => dot,
ENDCASE => error,
int => SELECT class FROM digit => int, dot => idot, letterE => epref,
ENDCASE => error,
dot => SELECT class FROM digit => frac,
ENDCASE => error,
idot, frac => SELECT class FROM digit => frac, letterE => epref,
ENDCASE => error,
epref => SELECT class FROM digit => exp, sign => esign,
ENDCASE => error,
esign => SELECT class FROM digit => exp,
ENDCASE => error,
exp => SELECT class FROM digit => exp,
ENDCASE => error,
ENDCASE => error];
};
RTransitionTable: TYPE ~ ARRAY RState OF REF RTransitionArray;
RTransitionArray: TYPE ~ ARRAY RClass OF RState;
RTransitionInit: PROC RETURNS [table: REF RTransitionTable] ~ {
table ← NEW [RTransitionTable];
FOR state: RState IN RState DO
array: REF RTransitionArray ~ NEW [RTransitionArray];
FOR class: RClass IN RClass DO
array[class] ← RTransition[state, class];
ENDLOOP;
table[state] ← array;
ENDLOOP;
};
rtransition: REF RTransitionTable ~ RTransitionInit[];
TokenInt: PROC [self: Root, text: Text] RETURNS [Any] ~ {
RETURN [AnyFromInt[Convert.IntFromRope[RefText.TrustTextAsRope[text]
! Convert.Error => GOTO Overflow]]];
EXITS Overflow => RETURN [TokenReal[self, text]];
};
TokenReal: PROC [self: Root, text: Text] RETURNS [Any] ~ {
state: RState ← empty;
fr: INT ← 0; -- mantissa
exp, adj: INTEGER ← 0; -- exponent and adjustment
mNeg, eNeg: BOOLFALSE;
mDigits, eDigits: NAT ← 0; -- significant digits
real: REAL;
FOR i: NAT IN[0..text.length) DO
char: CHAR ~ text[i];
state ← rtransition[state][rclassFromChar[char]];
SELECT state FROM
msign => mNeg ← (char='-);
int, frac => { d: [0..10) ~ char-'0;
IF state=frac THEN adj ← adj-1;
IF mDigits=0 AND d=0 THEN NULL -- leading zero
ELSE IF mDigits<9 THEN { fr ← fr*10+d; mDigits ← mDigits+1 }
ELSE { adj ← adj+1; IF mDigits=9 AND d>5 THEN fr ← fr+1; }; -- round if 10th digit >5
};
esign => eNeg ← (char='-);
exp => { d: [0..10) ~ char-'0;
IF eDigits=0 AND d=0 THEN NULL -- leading zero
ELSE IF eDigits<3 THEN { exp ← exp*10+d; eDigits ← eDigits+1 };
};
error => GOTO Fail;
ENDCASE;
ENDLOOP;
SELECT state FROM
int, idot, frac, exp => NULL; -- ok
ENDCASE => GOTO Fail;
IF mNeg THEN fr ← -fr;
IF eNeg THEN exp ← -exp;
real ← Real.PairToReal[fr: fr, exp10: exp+adj ! Real.RealException => RESUME];
RETURN [AnyFromReal[real]];
EXITS Fail => RETURN [TokenName[self, text]];
};
TokenRadix: PROC [self: Root, text: Text] RETURNS [Any] ~ {
state: {radix, val} ← radix;
radix: NAT ← 0;
val, limit: CARD ← 0;
FOR i: NAT IN [0..text.length) DO
char: CHAR ~ text[i];
d: Digit ~ digit[char];
SELECT state FROM
radix => SELECT char FROM
IN ['0..'9] => {
radix ← radix*10+d;
IF radix>36 THEN GOTO Bogus;
};
'# => {
IF radix NOT IN [2..36] THEN GOTO Bogus;
limit ← CARD.LAST/radix;
state ← val;
};
ENDCASE => GOTO Bogus;
val => {
IF NOT d<radix THEN GOTO Bogus;
IF val>limit THEN GOTO Bogus ELSE val ← val*radix;
IF d>(CARD.LAST-val) THEN GOTO Bogus ELSE val ← val+d;
};
ENDCASE => ERROR;
ENDLOOP;
RETURN [AnyFromInt[LOOPHOLE[val]]];
EXITS Bogus => RETURN [TokenName[self, text]];
};
TokenName: PROC [self: Root, text: Text] RETURNS [Any] ~ {
RETURN [AnyFromName[NameFromText[self, text]]];
};
TokenString: PROC [self: Root, text: Text] RETURNS [Any] ~ {
RETURN [AnyFromString[StringCreateFromText[self, text]]];
};
TokenProc: PROC [self: Root] RETURNS [Any] ~ {
size: INT ~ CountToMark[self];
array: Array ~ ArrayCreate[self, size];
AStore[self, array];
PopMark[self];
RETURN [CvX[AnyFromArray[array]]];
};
FileToken: PUBLIC PROC [self: Root, file: File] RETURNS [found: BOOL, token: Any] ~ {
stream: IO.STREAM ~ file.ref.stream;
text: REF TEXT ← self.buffer;
maxLength: NAT ← text.maxLength;
nest: INT ← 0; -- procedure nesting level
DO -- for each token in procedure
state: State ← empty;
slashes: [0..2] ← 0; -- number of slashes preceding name
val: CARDINAL ← 0;
length: NAT ← 0; -- text length
ExpandText: PROC ~ {
text.length ← length; text ← RefText.ReserveChars[text, 1]; maxLength ← text.maxLength;
};
Append: PROC [char: CHAR] ~ INLINE {
IF length=maxLength THEN ExpandText[];
text[length] ← char; length ← length+1;
};
DO -- for each character
char: CHAR ~ IO.GetChar[stream !
IO.EndOfStream => EXIT;
IO.Error => IF ec=StreamClosed THEN EXIT ELSE GOTO IOError;
];
result: TransitionResult ~ transition[state][classFromChar[char]];
state ← result.state;
SELECT result.action FROM
skip, stringStart, escapeStart => NULL;
putBack => IO.Backup[stream, char];
noteSlash => slashes ← slashes+1;
append, stringChar => Append[char];
escapeChar => Append[escape[char]];
escapeDigit => text[length-1] ← VAL[(ORD[text[length-1]]*8+digit[char]) MOD 256];
hexDigit1 => val ← digit[char];
hexDigit2 => Append[VAL[val*16+digit[char]]];
hexFill => Append[VAL[val*16]];
ENDCASE => ERROR Bug;
IF result.stop THEN EXIT;
ENDLOOP;
text.length ← length;
SELECT state FROM
empty, comment => { token ← null; EXIT };
int0, int1 => token ← TokenInt[self, text];
real1, real4 => token ← TokenReal[self, text];
radix1 => token ← TokenRadix[self, text];
sign, real0, real2, real3, radix0, slash, name => token ← TokenName[self, text];
string, hex => token ← TokenString[self, text];
proc0 => { token ← mark; nest ← nest+1 };
proc1 => { IF nest>0 THEN token ← TokenProc[self]; nest ← nest-1 };
ENDCASE => ERROR Error[syntaxerror];
SELECT slashes FROM
1 => token ← CvLit[token]; -- literal name
2 => token ← Load[self, token]; -- immediately evaluated name
ENDCASE;
IF nest>0 THEN PushAny[self, token] ELSE EXIT;
ENDLOOP;
IF nest#0 THEN Error[syntaxerror];
found ← Type[token]#null;
EXITS IOError => ERROR Error[ioerror];
};
StringToken: PUBLIC PROC [self: Root, string: String]
RETURNS
[found: BOOL, token: Any, post: String] ~ {
text: REF TEXTIF self=NIL THEN NIL ELSE self.buffer;
maxLength: NATIF text=NIL THEN 0 ELSE text.maxLength;
nest: INT ← 0; -- procedure nesting level
stringLength: StringIndex ~ StringLength[string];
index: StringIndex ← 0;
DO -- for each token in procedure
state: State ← empty;
slashes: [0..2] ← 0; -- number of slashes preceding name
val: CARDINAL ← 0;
length: NAT ← 0;
start, stop: StringIndex ← 0;
ExpandText: PROC ~ {
IF text=NIL THEN text ← RefText.New[20];
text.length ← length; text ← RefText.ReserveChars[text, 1]; maxLength ← text.maxLength;
};
Append: PROC [char: CHAR] ~ INLINE {
IF length=maxLength THEN ExpandText[];
text[length] ← char; length ← length+1;
};
WHILE index<stringLength DO -- for each character
char: CHAR ~ string.ref[string.val.start+index];
result: TransitionResult ~ transition[state][classFromChar[char]];
index ← index+1;
state ← result.state;
SELECT result.action FROM
skip => NULL;
putBack => index ← index-1;
noteSlash => slashes ← slashes+1;
stringStart => start ← stop ← index;
stringChar => stop ← index;
escapeStart => { stop ← index; state ← string0 }; -- no escape convention in strings
append => Append[char];
hexDigit1 => val ← digit[char];
hexDigit2 => Append[VAL[val*16+digit[char]]];
hexFill => Append[VAL[val*16]];
ENDCASE => ERROR Bug;
IF result.stop THEN EXIT;
ENDLOOP;
text.length ← length;
SELECT state FROM
empty, comment => { token ← null; EXIT };
int0, int1 => token ← TokenInt[self, text];
real1, real4 => token ← TokenReal[self, text];
radix1 => token ← TokenRadix[self, text];
sign, real0, real2, real3, radix0, slash, name => token ← TokenName[self, text];
string => token ← CvLit[AnyFromString[StringGetInterval[string, start, stop-start]]];
hex => token ← TokenString[self, text];
proc0 => { token ← mark; nest ← nest+1 };
proc1 => { IF nest>0 THEN token ← TokenProc[self]; nest ← nest-1 };
ENDCASE => ERROR Error[syntaxerror];
SELECT slashes FROM
1 => token ← CvLit[token]; -- literal name
2 => token ← Load[self, token]; -- immediately evaluated name
ENDCASE;
IF nest>0 AND self#NIL THEN PushAny[self, token] ELSE EXIT;
ENDLOOP;
IF nest#0 THEN Error[syntaxerror];
found ← Type[token]#null;
post ← StringGetInterval[string, index, stringLength-index];
};
END.