SchemeScannerImpl.mesa
Copyright Ó 1989, 1990, 1992 by Xerox Corporation. All rights reserved.
Last changed by Pavel on April 24, 1990 11:48 am PDT
Last tweaked by Mike Spreitzer May 12, 1992 11:37 am PDT
The implementation of the scanner used by the Cedar Scheme reader.
DIRECTORY
Ascii USING [CR, FF, LF, Lower, SP, TAB],
Atom USING [GetPName, MakeAtom, MakeAtomFromChar, MakeAtomFromRefText],
BigCardinals USING [BigAdd, BigCARD, BigFromCard, BigFromSmall, BigZero, BigMultiply, MultiplyByDigit, Zero],
FS USING [StreamOpen],
IO,
RefText USING [AppendChar, ObtainScratch, ReleaseScratch, TrustTextAsRope],
Rope,
Scheme,
SchemeExtras,
SymTab USING [Create, Fetch, Ref, Store];
SchemeScannerImpl: CEDAR PROGRAM
IMPORTS Ascii, Atom, BigCardinals, FS, IO, RefText, Rope, Scheme, SymTab
EXPORTS SchemeExtras
~ BEGIN OPEN Scheme, SchemeExtras;
ROPE: TYPE ~ Rope.ROPE;
Failed: ERROR ~ CODE; -- used in ScanNumber, below.
bigZero: BigCardinals.BigCARD ~ BigCardinals.BigFromSmall[0];
bigOne: BigCardinals.BigCARD ~ BigCardinals.BigFromSmall[1];
MultiplyByCARD: PROC [big: BigCardinals.BigCARD, small: CARD] RETURNS [BigCardinals.BigCARD] ~ {
IF small = 0 OR BigCardinals.BigZero[big] THEN
RETURN [bigZero];
IF small <= CARD16.LAST THEN
RETURN [BigCardinals.MultiplyByDigit[big, small]];
RETURN [BigCardinals.BigMultiply[big, BigCardinals.BigFromCard[small]]];
};
Digits: TYPE ~ RECORD [small: CARD, bigScale: CARD, big: BigCardinals.BigCARD ¬ BigCardinals.Zero]; -- represents small+(bigScale*big)
digitsZero: Digits ~ [small: 0, bigScale: 0, big: bigZero];
digitsOne: Digits ~ [small: 1, bigScale: 0, big: bigZero];
smallLimit: CARD ~ CARD.LAST/16; -- if bigger than this, can't fit another digit
The Scanner
GetToken: PUBLIC PROC [stream: IO.STREAM, buffer: REF TEXT, flushComments: BOOL ¬ TRUE, buildValue: BOOL ¬ FALSE] RETURNS [tokenKind: TokenKind, token: REF TEXT, error: ScanningError ¬ none, value: Any ¬ unspecified] ~ {
char: CHAR ¬ ' ;
NextChar: PROC RETURNS [CHAR] ~ {
token ¬ RefText.AppendChar[token, (char ¬ IO.GetChar[stream])];
char ¬ Ascii.Lower[char];
RETURN [char];
};
NextStringChar: PROC RETURNS [CHAR] ~ {
Like NextChar, but don't lower-case the character.
token ¬ RefText.AppendChar[token, (char ¬ IO.GetChar[stream])];
RETURN [char];
};
NextCharSkip: PROC RETURNS [CHAR] ~ {
No accumulation or lower casing necessary here
char ¬ IO.GetChar[stream];
RETURN [char];
};
PutbackChar: PROC ~ {
IO.Backup[stream, token[token.length ¬ token.length - 1]];
};
BadIdentifier: PROC ~ {
First finish reading the token.
DO
SELECT NextChar[ ! IO.EndOfStream => EXIT] FROM
Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF, '(, '), '", '; => {
PutbackChar[];
EXIT;
};
ENDCASE => NULL;
ENDLOOP;
tokenKind ¬ error;
value ¬ StringFromRope[Rope.FromRefText[token]];
token ¬ "Illegally-formed identifier";
error ¬ badIdentifier;
};
BadEOF: PROC ~ {
tokenKind ¬ error;
value ¬ StringFromRope[Rope.FromRefText[token]];
token ¬ "Unexpected end of file before end of token";
error ¬ earlyEOF;
};
ScanNumber: PROC ~ {
Some of the number has been read already; those characters are in token. Read the rest of it into token and then analyze it.
terminatedByEOF: BOOL ¬ FALSE;
DO
SELECT NextStringChar[ ! IO.EndOfStream => { terminatedByEOF ¬ TRUE; EXIT}] FROM
Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF, '(, '), '", '; => {
PutbackChar[];
EXIT;
};
ENDCASE => NULL;
ENDLOOP;
BEGIN
index: NAT ¬ 0;
len: NAT ~ token.length;
char: CHAR ¬ ' ;
radix: CARDINAL ¬ 0; -- unknown
Exactness: TYPE ~ {true, false, unknown};
exact: Exactness ¬ unknown;
Next: PROC RETURNS [CHAR] ~ {
IF index >= len THEN
ERROR IO.EndOfStream[NIL];
char ¬ Ascii.Lower[token[index]];
index ¬ index + 1;
RETURN [char];
};
Peek: PROC RETURNS [CHAR] ~ {
IF index >= len THEN
ERROR IO.EndOfStream[NIL];
RETURN [char ¬ Ascii.Lower[token[index]]];
};
Bump: PROC ~ INLINE {
index ¬ index + 1;
};
Fail: PROC [msg: REF TEXT] ~ {
tokenKind ¬ error;
error ¬ badNumber;
value ¬ StringFromRope[Rope.FromRefText[token]];
token ¬ msg;
ERROR Failed;
};
Exact: PROC [value: Exactness] ~ {
IF exact = unknown THEN
exact ¬ value
ELSE
Fail["Too many exactness specifiers"];
};
Radix: PROC [value: CARDINAL] ~ {
IF radix = 0 THEN
radix ¬ value
ELSE
Fail["Too many radix specifiers"];
};
UReal: PROC [negative: BOOL] RETURNS [uReal: Any ¬ NIL] ~ {
Scan an unsigned real, returning the built value if appropriate.
Which: TYPE ~ {num, denom};
val: ARRAY Which OF Digits ¬ [num: digitsZero, denom: digitsOne];
exponent: INT ¬ 0;
exponentSign: INT ¬ 1;
Fold: PROC [w: Which] ~ {
val[w].big ¬ BigCardinals.BigAdd[
MultiplyByCARD[val[w].big, val[w].bigScale],
BigCardinals.BigFromCard[val[w].small]
];
val[w].small ¬ 0;
val[w].bigScale ¬ 1;
};
GetVal: PROC [w: Which] RETURNS [BigCardinals.BigCARD] ~ INLINE {
Fold[w];
RETURN [val[w].big]
};
AccumDigit: PROC [w: Which, digit: CARDINAL] ~ INLINE { -- uses radix
IF buildValue THEN {
IF val[w].small > smallLimit OR val[w].bigScale > smallLimit THEN
Fold[w];
val[w].small ¬ val[w].small*radix+digit;
IF val[w].bigScale # 0 THEN
val[w].bigScale ¬ val[w].bigScale*radix;
};
};
AccumCheckedDigit: PROC [w: Which, digit: CARDINAL] ~ {
IF digit >= radix THEN
Fail[Rope.ToRefText[IO.PutFR["Illegal digit `%g' in radix %g numbers", [character[char]], [integer[radix]]]]]
ELSE
AccumDigit[w, digit]
};
AccumAfterDotDigit: PROC [digit: CARDINAL] ~ {
AccumDigit[num, digit];
AccumDigit[denom, 0];
};
EnsureDecimal: PROC ~ {
EnsureInexact[];
IF radix # 10 THEN
Fail[Rope.ToRefText[IO.PutFR1["The character `%g' may not appear in a decimal number", [character[char]]]]];
};
EnsureInexact: PROC ~ {
IF exact = true THEN
Fail[Rope.ToRefText[IO.PutFR1["The character `%g' may not appear in an exact number", [character[char]]]]]
ELSE
exact ¬ false;
};
State: TYPE ~ {start, inNum, inNumHashes, initialDot, inDecimal, inDecimalHashes, afterExpMarker, afterExpSign, inExp, afterSlash, inDenom, inDenomHashes};
These states have the semantics portrayed in the following diagram.
state: State ¬ start;
[Artwork node; type 'Artwork on' to command tool]
BEGIN -- for exits
ENABLE
IO.EndOfStream =>
SELECT state FROM
start, initialDot, afterSlash, afterExpMarker =>
GO TO BadEOFReturn;
ENDCASE =>
GO TO OKReturn;
SELECT Next[] FROM
IN ['0..'9] => {
AccumCheckedDigit[num, char - '0];
state ¬ inNum;
};
IN ['a..'f] => {
AccumCheckedDigit[num, char - 'a + 10];
state ¬ inNum;
};
'. => {
EnsureDecimal[];
state ¬ initialDot;
};
ENDCASE =>
GO TO BadCharReturn;
DO -- finite state machine main loop
SELECT state FROM
inNum =>
SELECT Peek[] FROM
IN ['0..'9] => {
Bump[];
AccumCheckedDigit[num, char - '0];
};
IN ['a..'c] => {
Bump[];
AccumCheckedDigit[num, char - 'a + 10];
};
'd, 'e, 'f => {
Bump[];
IF radix = 16 THEN
AccumDigit[num, char - 'a + 10]
ELSE
state ¬ afterExpMarker;
};
's, 'l => {
Bump[];
state ¬ afterExpMarker;
};
'. => {
Bump[];
EnsureDecimal[];
state ¬ inDecimal;
};
'/ => {
Bump[];
state ¬ afterSlash;
};
'# => {
Bump[];
EnsureInexact[];
AccumDigit[num, 0];
state ¬ inNumHashes;
};
ENDCASE =>
GO TO OKReturn;
inNumHashes =>
SELECT Peek[] FROM
'# => {
Bump[];
AccumDigit[num, 0];
};
'/ => {
Bump[];
state ¬ afterSlash;
};
'. => {
Bump[];
EnsureDecimal[];
state ¬ inDecimalHashes;
};
'e, 's, 'f, 'd, 'l => {
Bump[];
state ¬ afterExpMarker;
};
ENDCASE =>
GO TO OKReturn;
initialDot =>
IF Next[] IN ['0..'9] THEN {
AccumAfterDotDigit[char - '0];
state ¬ inDecimal;
}
ELSE
GO TO BadCharReturn;
inDecimal =>
SELECT Peek[] FROM
IN ['0..'9] => {
Bump[];
AccumAfterDotDigit[char - '0];
};
'e, 's, 'f, 'd, 'l => {
Bump[];
state ¬ afterExpMarker;
};
'# => {
Bump[];
AccumAfterDotDigit[0];
state ¬ inDecimalHashes;
};
ENDCASE =>
GO TO OKReturn;
inDecimalHashes =>
SELECT Peek[] FROM
'# => {
Bump[];
AccumAfterDotDigit[0]; - unnecessary
};
'e, 's, 'f, 'd, 'l => {
Bump[];
state ¬ afterExpMarker;
};
ENDCASE =>
GO TO OKReturn;
afterExpMarker => {
EnsureDecimal[];
SELECT Next[] FROM
'+ => state ¬ afterExpSign;
'- => {
exponentSign ¬ -1;
state ¬ afterExpSign;
};
IN ['0..'9] => {
exponent ¬ char - '0;
state ¬ inExp;
};
ENDCASE =>
GO TO BadCharReturn;
};
afterExpSign =>
IF Next[] IN ['0..'9] THEN {
exponent ¬ exponent * 10 + char - '0;
state ¬ inExp;
}
ELSE
GO TO BadCharReturn;
inExp =>
IF Peek[] IN ['0..'9] THEN {
Bump[];
exponent ¬ exponent * 10 + char - '0;
}
ELSE
GO TO OKReturn;
afterSlash => {
val[denom] ¬ digitsZero;
SELECT Next[] FROM
IN ['0..'9] => {
AccumCheckedDigit[denom, char - '0];
state ¬ inDenom;
};
IN ['a..'f] => {
AccumCheckedDigit[denom, char - 'a + 10];
state ¬ inDenom;
};
ENDCASE =>
GO TO BadCharReturn;
};
inDenom =>
SELECT Peek[] FROM
IN ['0..'9] => {
Bump[];
AccumCheckedDigit[denom, char - '0];
};
IN ['a..'f] => {
Bump[];
AccumCheckedDigit[denom, char - 'a + 10];
};
'# => {
Bump[];
EnsureInexact[];
AccumDigit[denom, 0];
state ¬ inDenomHashes;
};
ENDCASE =>
GO TO OKReturn;
inDenomHashes =>
IF Peek[] = '# THEN {
Bump[];
AccumDigit[denom, 0];
}
ELSE
GO TO OKReturn;
ENDCASE => ERROR;
ENDLOOP;
EXITS
BadCharReturn =>
Fail[Rope.ToRefText[IO.PutFR1["Unexpected character `%g' in number", [character[char]]]]];
BadEOFReturn => {
IF terminatedByEOF THEN
BadEOF[]
ELSE {
tokenKind ¬ error;
error ¬ badNumber;
value ¬ StringFromRope[Rope.FromRefText[token]];
token ¬ "Numeric token ended prematurely";
};
ERROR Failed;
};
OKReturn => {
IF buildValue THEN {
IF exact # false AND exponent = 0 AND val[num].bigScale = 0 AND val[denom].bigScale = 0 AND val[denom].small = 1 AND val[num].small <= CARD[INT.LAST] THEN {
SPECIAL CASE - Small exact integer
uReal ¬
MakeFixnum[
IF negative THEN
-INT[val[num].small]
ELSE
INT[val[num].small]
];
}
ELSE IF exact = false AND exponent = 0 AND val[num].bigScale = 0 AND val[denom].bigScale = 0 AND val[num].small <= 1000000 AND val[denom].small <= 1000000 THEN {
SPECIAL CASE - Small inexact real; can generate flonum with only one roundoff
float: REAL ¬ REAL[INT[val[num].small]] / REAL[INT[val[denom].small]];
IF negative THEN
float ¬ -float;
uReal ¬ NEW[NumberRep.flonum ¬ [FALSE, flonum[float]]];
}
ELSE
uReal ¬
MakeReal[
negative: negative,
numerator: GetVal[num],
denominator: GetVal[denom],
exponent: exponent * exponentSign,
radix: 10, -- just for the exponent now
exact: exact # false];
};
RETURN;
};
END;
};
MakeInteger: PROC [i: INT] RETURNS [Any] ~ {
Like Scheme.MakeFixnum, but pay attention to the setting of exact.
IF exact # false THEN
RETURN [MakeFixnum[i]]
ELSE
RETURN [NEW[NumberRep.flonum ¬ [FALSE, flonum[REAL[i]]]]];
};
tokenKind ¬ number;
BEGIN -- for exits
ENABLE {
Failed => GO TO Return;
IO.EndOfStream => GO TO BadEOFReturn;
};
Get the prefix.
WHILE Peek[] = '# DO
Bump[];
SELECT Next[] FROM
'b => Radix[2];
'd => Radix[8];
'o => Radix[10];
'x => Radix[16];
'w => Radix[16];
'e => Exact[true];
'i => Exact[false];
ENDCASE => Fail["Illegal #-specifier in number"];
ENDLOOP;
IF radix = 0 THEN radix ¬ 10;
Get the first possibly-signed real and check for the `+i', `-i', `+Ui', and `-Ui' cases.
SELECT Peek[] FROM
'+, '- => {
negative: BOOL ~ (char = '-);
Bump[];
IF Peek[] = 'i THEN {
Bump[];
IF buildValue THEN
value ¬ MakeRectangular[MakeInteger[0], MakeInteger[IF negative THEN -1 ELSE 1]];
GO TO EndCheck;
};
value ¬ UReal[negative];
IF Peek[! IO.EndOfStream => GO TO Return] = 'i THEN {
Bump[];
IF buildValue THEN
value ¬ MakeRectangular[MakeInteger[0], value];
GO TO EndCheck;
};
};
ENDCASE =>
value ¬ UReal[FALSE];
Check for other complex number cases.
SELECT Peek[! IO.EndOfStream => GO TO Return] FROM
'@ => {
magnitude: Any ¬ value;
Bump[];
SELECT Peek[] FROM
'+, '- => {
Bump[];
value ¬ UReal[char = '-];
};
ENDCASE =>
value ¬ UReal[FALSE];
IF buildValue THEN
value ¬ MakePolar[magnitude, value];
GO TO EndCheck;
};
'+, '- => {
negative: BOOL ~ (char = '-);
realPart: Any ¬ value;
Bump[];
IF Peek[] = 'i THEN { -- this is the `R+i' or `R-i' case
Bump[];
IF buildValue THEN
value ¬ MakeRectangular[value, MakeInteger[IF negative THEN -1 ELSE 1]];
GO TO EndCheck;
};
value ¬ UReal[negative];
IF Next[] # 'i THEN
Fail["Complex number does not end in `i'"];
IF buildValue THEN
value ¬ MakeRectangular[realPart, value];
GO TO EndCheck;
};
ENDCASE => GO TO EndCheck;
ERROR; -- we should never get this far; every case should be taking one of the exits
EXITS
Return =>
RETURN;
EndCheck => {
IF index # len THEN {
tokenKind ¬ error;
error ¬ badNumber;
value ¬ StringFromRope[Rope.FromRefText[token]];
token ¬ "Numeric token contains extra characters";
};
RETURN;
};
BadEOFReturn => {
IF terminatedByEOF THEN
BadEOF[]
ELSE {
tokenKind ¬ error;
error ¬ badNumber;
value ¬ StringFromRope[Rope.FromRefText[token]];
token ¬ "Numeric token ended prematurely";
};
RETURN;
};
END;
END;
};
ScanIdentifier: PROC [allowTail: BOOL] ~ {
SELECT NextChar[ ! IO.EndOfStream => GO TO BadEOFReturn] FROM
'# => {
IF NextChar[ ! IO.EndOfStream => GO TO BadEOFReturn] # '" THEN {
PutbackChar[];
BadIdentifier[];
RETURN;
}
ELSE {
ScanString[identifier];
RETURN;
};
};
'+, '-, '., IN ['0..'9] => {
Identifiers can't start with these, except for the `peculiar' ones "+", "-", and "...", which are handled elsewhere.
BadIdentifier[];
RETURN;
};
ENDCASE => {
IF NOT identifierConstituent[char] THEN {
PutbackChar[];
BadIdentifier[];
RETURN;
}
ELSE {
buffer: REF TEXT ¬ IF buildValue THEN RefText.ObtainScratch[100] ELSE NIL;
AddChar: PROC ~ {
IF buildValue THEN
buffer ¬ RefText.AppendChar[buffer, char];
};
AddChar[];
DO
SELECT NextChar[ ! IO.EndOfStream => EXIT] FROM
Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF, '(, '), '", '; => {
PutbackChar[];
EXIT;
};
'# => {
IF allowTail THEN {
PutbackChar[];
EXIT;
}
ELSE {
BadIdentifier[];
RETURN;
}
};
ENDCASE =>
IF identifierConstituent[char] THEN
AddChar[]
ELSE {
BadIdentifier[];
RETURN;
};
ENDLOOP;
tokenKind ¬ identifier;
IF buildValue THEN {
value ¬ Atom.MakeAtomFromRefText[buffer];
RefText.ReleaseScratch[buffer];
};
};
};
EXITS
BadEOFReturn => {
BadEOF[];
RETURN;
};
};
ScanIdentifierTail: PROC ~ {
tokenKind is identifier, and, if buildValue, value is the appropriate symbol. Check for this symbol being the first part of a structured name.
IF IO.PeekChar[stream ! IO.EndOfStream => GO TO Return] = '# THEN {
interfaceName: Any ~ value;
[] ¬ NextChar[]; -- skip the hash mark
ScanIdentifier[FALSE];
IF tokenKind = identifier THEN { -- as opposed to `error'
tokenKind ¬ moduleReference;
IF buildValue THEN
value ¬ Cons[$access, Cons[interfaceName, Cons[value, NIL]]];
};
};
EXITS
Return => RETURN;
};
ScanString: PROC [nonErrorKind: TokenKind] ~ {
buffer: REF TEXT ¬ IF buildValue THEN RefText.ObtainScratch[100] ELSE NIL;
AddChar: PROC [c: CHAR] ~ {
IF buildValue THEN
buffer ¬ RefText.AppendChar[buffer, c];
};
tokenKind ¬ nonErrorKind;
DO
SELECT NextStringChar[ ! IO.EndOfStream => EXIT] FROM
'" => {
IF buildValue THEN {
SELECT nonErrorKind FROM
string => value ¬ StringFromRope[Rope.FromRefText[buffer]];
identifier => value ¬ Atom.MakeAtomFromRefText[buffer];
ENDCASE => ERROR;
RefText.ReleaseScratch[buffer];
};
RETURN;
};
'\\ => {
SELECT NextChar[ ! IO.EndOfStream => EXIT] FROM
'b => AddChar['\b];
'f, 'p => AddChar['\f];
'l => AddChar['\l];
'n => AddChar['\n];
'r => AddChar['\r];
't => AddChar['\t];
'", '\\ => AddChar[char];
IN ['0..'3] => {
ascii: [0 .. 377B] ¬ ORD[char] - ORD['0];
FOR i: INT IN [3..4] DO
SELECT NextChar[ ! IO.EndOfStream => EXIT] FROM
IN ['0..'7] => ascii ¬ ascii * 10B + ORD[char] - ORD['0];
ENDCASE => {
tokenKind ¬ error;
error ¬ unknownStringEscape;
value ¬ StringFromRope[Rope.FromRefText[s: token, start: token.length - i, len: i]];
token ¬ "Unknown escape sequence in string";
RETURN;
};
ENDLOOP;
AddChar[VAL[ascii]];
};
ENDCASE => {
tokenKind ¬ error;
error ¬ unknownStringEscape;
value ¬ StringFromRope[Rope.FromRefText[s: token, start: token.length - 2, len: 2]];
token ¬ "Unknown escape sequence in string";
RETURN;
};
};
ENDCASE => AddChar[char];
ENDLOOP;
We can only get here on EOF.
BadEOF[];
RETURN;
};
token ¬ buffer;
BEGIN-- for exits
DO-- until we get a real token
token.length ¬ 0;
SELECT NextChar[ ! IO.EndOfStream => GO TO EOFReturn] FROM
Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF => LOOP;
'; => {
tokenKind ¬ comment;
IF flushComments THEN {
DO
SELECT NextCharSkip[ ! IO.EndOfStream => GO TO EOFReturn] FROM
Ascii.CR, Ascii.LF => EXIT;
ENDCASE;
ENDLOOP;
}
ELSE
DO
SELECT NextChar[ ! IO.EndOfStream => GO TO Return] FROM
Ascii.CR, Ascii.LF => GO TO PutbackReturn;
Don't include the CR in the comment
ENDCASE;
ENDLOOP;
};
'( => {
tokenKind ¬ openParenthesis;
RETURN};
') => {
tokenKind ¬ closeParenthesis;
RETURN};
'' => {
tokenKind ¬ quote;
RETURN};
'` => {
tokenKind ¬ quasiquote;
RETURN};
', => {
tokenKind ¬ unquote;
IF NextChar[ ! IO.EndOfStream => GO TO Return] = '@ THEN {
tokenKind ¬ unquoteSplicing;
RETURN;
}
ELSE
GO TO PutbackReturn;
};
IN ['0 .. '9] => {
ScanNumber[];
RETURN};
'. => { -- either dot or the peculiar identifier "..." or a number.
tokenKind ¬ dot;
SELECT NextChar[ ! IO.EndOfStream => GO TO Return] FROM
Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF, '(, '), '", '; =>
GO TO PutbackReturn;
IN ['0 .. '9] => {
ScanNumber[];
RETURN};
'. => {
SELECT NextChar[ ! IO.EndOfStream => GO TO BadEOFReturn] FROM
Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF, '(, '), '", ' => {
PutbackChar[];
BadIdentifier[];
RETURN};
'. => NULL;
ENDCASE => {
BadIdentifier[];
RETURN};
tokenKind ¬ identifier;
IF buildValue THEN
value ¬ Atom.MakeAtom["..."];
Check that the token is properly delimited.
SELECT IO.PeekChar[stream ! IO.EndOfStream => GO TO Return] FROM
Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF, '(, '), '", '; =>
RETURN;
'# => {
ScanIdentifierTail[];
RETURN};
ENDCASE => {
BadIdentifier[];
RETURN};
};
ENDCASE => {
BadIdentifier[];
RETURN};
};
'+, '- => { -- either a peculiar identifier ("+" or "-") or a number.
tokenKind ¬ identifier;
IF buildValue THEN
value ¬ Atom.MakeAtomFromChar[char];
SELECT IO.PeekChar[stream ! IO.EndOfStream => GO TO Return] FROM
Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF, '(, '), '", '; =>
RETURN;
'# => {
ScanIdentifierTail[];
RETURN};
ENDCASE => {
ScanNumber[];
RETURN};
};
'#=> {
SELECT NextChar[ ! IO.EndOfStream => GO TO BadEOFReturn] FROM
't, 'f => {
tokenKind ¬ boolean;
IF buildValue THEN
value ¬ IF char = 't THEN true ELSE false;
RETURN};
'b, 'o, 'd, 'x, 'w, 'i, 'e => {
ScanNumber[];
RETURN};
'\\ => {
found: BOOL;
buffer: REF TEXT ¬ RefText.ObtainScratch[10];
DO
SELECT NextStringChar[ ! IO.EndOfStream => IF buffer.length = 0 THEN GO TO BadEOFReturn ELSE EXIT] FROM
Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF, '(, '), '", '; => {
IF buffer.length = 0 THEN
buffer ¬ RefText.AppendChar[buffer, char]
ELSE
PutbackChar[];
EXIT;
};
ENDCASE => buffer ¬ RefText.AppendChar[buffer, char];
ENDLOOP;
tokenKind ¬ character;
We must treat one-character names specially because the SymTab below is case-insensitive.
IF buffer.length = 1 THEN {
IF buildValue THEN
value ¬ MakeChar[buffer[0]];
RETURN;
};
[found, value] ¬ SymTab.Fetch[x: charForName, key: RefText.TrustTextAsRope[buffer]];
RefText.ReleaseScratch[buffer];
IF NOT found THEN {
tokenKind ¬ error;
error ¬ unknownCharacterName;
value ¬ StringFromRope[Rope.FromRefText[token]];
token ¬ "Unknown named character"
};
RETURN};
'( => {
tokenKind ¬ openVector;
RETURN};
'" => {
ScanString[identifier];
IF tokenKind = identifier THEN -- as opposed to `error'
ScanIdentifierTail[];
RETURN};
'! => {
found: BOOL;
buffer: REF TEXT ¬ RefText.ObtainScratch[10];
DO
SELECT NextStringChar[ ! IO.EndOfStream => IF buffer.length = 0 THEN GO TO BadEOFReturn ELSE EXIT] FROM
Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF, '(, '), '", '; => {
IF buffer.length = 0 THEN
buffer ¬ RefText.AppendChar[buffer, char]
ELSE
PutbackChar[];
EXIT;
};
ENDCASE => buffer ¬ RefText.AppendChar[buffer, char];
ENDLOOP;
tokenKind ¬ character;
[found, value] ¬ SymTab.Fetch[x: primitiveSyntaxForName, key: RefText.TrustTextAsRope[buffer]];
RefText.ReleaseScratch[buffer];
IF NOT found THEN {
tokenKind ¬ error;
error ¬ unknownPrimitiveSyntax;
value ¬ StringFromRope[Rope.FromRefText[token]];
token ¬ "Unknown primitive syntax marker"
};
RETURN};
ENDCASE => {
tokenKind ¬ error;
error ¬ unknownHashDispatch;
value ¬ StringFromRope[Rope.FromRefText[token]];
token ¬ "Unknown # token";
RETURN};
};
'"=> {
ScanString[string];
RETURN};
ENDCASE => {
PutbackChar[];
ScanIdentifier[TRUE];
IF tokenKind = identifier THEN
ScanIdentifierTail[];
RETURN};
ENDLOOP;
EXITS
Return => { RETURN };
PutbackReturn => {
PutbackChar[];
RETURN};
BadEOFReturn => {
BadEOF[];
RETURN};
EOFReturn => {
tokenKind ¬ endOfFile;
RETURN};
END;
};
primitiveSyntaxForName: SymTab.Ref ~ InitPrimitiveSyntax[];
InitPrimitiveSyntax: PROC RETURNS [table: SymTab.Ref] ~ {
table ¬ SymTab.Create[case: FALSE];
FOR ps: PrimitiveSyntaxRep IN PrimitiveSyntaxRep DO
[] ¬ SymTab.Store[x: table, key: Atom.GetPName[symbolForPrimitiveSyntaxRep[ps]], val: primitiveSyntaxForPrimitiveSyntaxRep[ps]];
ENDLOOP;
};
charForName: SymTab.Ref ~ SymTab.Create[case: FALSE];
nameForChar: REF ARRAY CHAR OF ROPE ~ InitCharNames[];
InitCharNames: PROC RETURNS [REF ARRAY CHAR OF ROPE] ~ {
a: REF ARRAY CHAR OF ROPE ¬ NEW[ARRAY CHAR OF ROPE];
AddName: PROC [char: CHAR, name: ROPE] ~ {
[] ¬ SymTab.Store[x: charForName, key: name, val: MakeChar[char]];
a[char] ¬ name;
};
Names are added in increasing order of preference on output. That is, the last name assigned to each character is the one to be used on output.
FOR c: CHAR IN CHAR DO
AddName[c, Rope.Flatten[IO.PutFR1["%03b", [integer[ORD[c]]]]]];
ENDLOOP;
AddName['(, Rope.Flatten["OpenPar"]];
AddName['(, Rope.Flatten["OpenParen"]];
AddName['), Rope.Flatten["ClosePar"]];
AddName['), Rope.Flatten["CloseParen"]];
FOR c: CHAR IN (' ..'~] DO
AddName[c, Rope.FromChar[c]];
ENDLOOP;
AddName['\r, Rope.Flatten["Return"]];
AddName['\l, Rope.Flatten["LineFeed"]];
AddName['\n, Rope.Flatten["Newline"]];
AddName['\t, Rope.Flatten["Tab"]];
AddName[' , Rope.Flatten["Space"]];
AddName['\f, Rope.Flatten["FormFeed"]];
AddName['\f, Rope.Flatten["Page"]];
RETURN [a]
};
identifierConstituent: REF ARRAY CHAR OF BOOL ~ InitIdentifierConstituent[];
InitIdentifierConstituent: PROC RETURNS [a: REF ARRAY CHAR OF BOOL] ~ {
a ¬ NEW[ARRAY CHAR OF BOOL ¬ ALL[FALSE]];
FOR c: CHAR IN ['0..'9] DO
a[c] ¬ TRUE;
ENDLOOP;
FOR c: CHAR IN ['A..'Z] DO
a[c] ¬ TRUE;
ENDLOOP;
FOR c: CHAR IN ['a..'z] DO
a[c] ¬ TRUE;
ENDLOOP;
a['!] ¬ TRUE;
a['$] ¬ TRUE;
a['%] ¬ TRUE;
a['&] ¬ TRUE;
a['*] ¬ TRUE;
a['/] ¬ TRUE;
a[':] ¬ TRUE;
a['<] ¬ TRUE;
a['=] ¬ TRUE;
a['>] ¬ TRUE;
a['?] ¬ TRUE;
a['~] ¬ TRUE;
a['←] ¬ TRUE;
a['^] ¬ TRUE;
a['.] ¬ TRUE;
a['+] ¬ TRUE;
a['-] ¬ TRUE;
};
Testing
Test: PROC [name: ROPE, out: IO.STREAM] ~ {
in: IO.STREAM ~ FS.StreamOpen[name];
buffer: REF TEXT ¬ NEW[TEXT[10]];
token: REF TEXT ¬ NEW[TEXT[10]];
tokenKind: TokenKind;
error: ScanningError;
value: Any;
DO
[tokenKind, token, error, value] ¬ GetToken[in, buffer, FALSE, TRUE];
SELECT tokenKind FROM
identifier, moduleReference, boolean, number, character, string, primitiveSyntax =>
IO.PutF[out, "%g: %g\n", [refAny[NEW[TokenKind ¬ tokenKind]]], [refAny[value]]];
endOfFile => EXIT;
error =>
IO.PutF[out, "**Error (%g)** %g\n\t%g\n", [refAny[NEW[ScanningError ¬ error]]], [text[token]], [refAny[value]]];
comment => IO.PutF1[out, "%g\n", [text[token]]];
ENDCASE =>
IO.PutF[out, "%g: %g\n", [refAny[NEW[TokenKind ¬ tokenKind]]], [text[token]]];
ENDLOOP;
IO.Close[in];
};
END.