SchemeScannerImpl.mesa
Copyright Ó 1989, 1990, 1992 by Xerox Corporation. All rights reserved.
Last changed by Pavel on April 24, 1990 11:48 am PDT
Last tweaked by Mike Spreitzer May 12, 1992 11:37 am PDT
The implementation of the scanner used by the Cedar Scheme reader.
DIRECTORY
Ascii USING [CR, FF, LF, Lower, SP, TAB],
Atom USING [GetPName, MakeAtom, MakeAtomFromChar, MakeAtomFromRefText],
BigCardinals USING [BigAdd, BigCARD, BigFromCard, BigFromSmall, BigZero, BigMultiply, MultiplyByDigit, Zero],
FS USING [StreamOpen],
IO,
RefText USING [AppendChar, ObtainScratch, ReleaseScratch, TrustTextAsRope],
Rope,
Scheme,
SchemeExtras,
SymTab USING [Create, Fetch, Ref, Store];
SchemeScannerImpl: CEDAR PROGRAM
IMPORTS Ascii, Atom, BigCardinals, FS, IO, RefText, Rope, Scheme, SymTab
EXPORTS SchemeExtras
~
BEGIN
OPEN Scheme, SchemeExtras;
ROPE: TYPE ~ Rope.ROPE;
Failed: ERROR ~ CODE; -- used in ScanNumber, below.
bigZero: BigCardinals.BigCARD ~ BigCardinals.BigFromSmall[0];
bigOne: BigCardinals.BigCARD ~ BigCardinals.BigFromSmall[1];
MultiplyByCARD:
PROC [big: BigCardinals.BigCARD, small:
CARD]
RETURNS [BigCardinals.BigCARD] ~ {
IF small = 0
OR BigCardinals.BigZero[big]
THEN
RETURN [bigZero];
IF small <=
CARD16.
LAST
THEN
RETURN [BigCardinals.MultiplyByDigit[big, small]];
RETURN [BigCardinals.BigMultiply[big, BigCardinals.BigFromCard[small]]];
};
Digits: TYPE ~ RECORD [small: CARD, bigScale: CARD, big: BigCardinals.BigCARD ¬ BigCardinals.Zero]; -- represents small+(bigScale*big)
digitsZero: Digits ~ [small: 0, bigScale: 0, big: bigZero];
digitsOne: Digits ~ [small: 1, bigScale: 0, big: bigZero];
smallLimit: CARD ~ CARD.LAST/16; -- if bigger than this, can't fit another digit
The Scanner
GetToken:
PUBLIC
PROC [stream:
IO.
STREAM, buffer:
REF
TEXT, flushComments:
BOOL ¬
TRUE, buildValue:
BOOL ¬
FALSE]
RETURNS [tokenKind: TokenKind, token:
REF
TEXT, error: ScanningError ¬ none, value: Any ¬ unspecified] ~ {
char: CHAR ¬ ' ;
NextChar:
PROC
RETURNS [
CHAR] ~ {
token ¬ RefText.AppendChar[token, (char ¬ IO.GetChar[stream])];
char ¬ Ascii.Lower[char];
RETURN [char];
};
NextStringChar:
PROC
RETURNS [
CHAR] ~ {
Like NextChar, but don't lower-case the character.
token ¬ RefText.AppendChar[token, (char ¬ IO.GetChar[stream])];
RETURN [char];
};
NextCharSkip:
PROC
RETURNS [
CHAR] ~ {
No accumulation or lower casing necessary here
char ¬ IO.GetChar[stream];
RETURN [char];
};
PutbackChar:
PROC ~ {
IO.Backup[stream, token[token.length ¬ token.length - 1]];
};
BadIdentifier:
PROC ~ {
First finish reading the token.
DO
SELECT NextChar[ !
IO.EndOfStream =>
EXIT]
FROM
Ascii.
SP, Ascii.
TAB, Ascii.
CR, Ascii.
LF, Ascii.
FF, '(, '), '", '; => {
PutbackChar[];
EXIT;
};
ENDCASE => NULL;
ENDLOOP;
tokenKind ¬ error;
value ¬ StringFromRope[Rope.FromRefText[token]];
token ¬ "Illegally-formed identifier";
error ¬ badIdentifier;
};
BadEOF:
PROC ~ {
tokenKind ¬ error;
value ¬ StringFromRope[Rope.FromRefText[token]];
token ¬ "Unexpected end of file before end of token";
error ¬ earlyEOF;
};
ScanNumber:
PROC ~ {
Some of the number has been read already; those characters are in token. Read the rest of it into token and then analyze it.
terminatedByEOF: BOOL ¬ FALSE;
DO
SELECT NextStringChar[ !
IO.EndOfStream => { terminatedByEOF ¬
TRUE;
EXIT}]
FROM
Ascii.
SP, Ascii.
TAB, Ascii.
CR, Ascii.
LF, Ascii.
FF, '(, '), '", '; => {
PutbackChar[];
EXIT;
};
ENDCASE => NULL;
ENDLOOP;
BEGIN
index: NAT ¬ 0;
len: NAT ~ token.length;
char: CHAR ¬ ' ;
radix: CARDINAL ¬ 0; -- unknown
Exactness: TYPE ~ {true, false, unknown};
exact: Exactness ¬ unknown;
Next:
PROC
RETURNS [
CHAR] ~ {
IF index >= len
THEN
ERROR IO.EndOfStream[NIL];
char ¬ Ascii.Lower[token[index]];
index ¬ index + 1;
RETURN [char];
};
Peek:
PROC
RETURNS [
CHAR] ~ {
IF index >= len
THEN
ERROR IO.EndOfStream[NIL];
RETURN [char ¬ Ascii.Lower[token[index]]];
};
Bump:
PROC ~
INLINE {
index ¬ index + 1;
};
Fail:
PROC [msg:
REF
TEXT] ~ {
tokenKind ¬ error;
error ¬ badNumber;
value ¬ StringFromRope[Rope.FromRefText[token]];
token ¬ msg;
ERROR Failed;
};
Exact:
PROC [value: Exactness] ~ {
IF exact = unknown
THEN
exact ¬ value
ELSE
Fail["Too many exactness specifiers"];
};
Radix:
PROC [value:
CARDINAL] ~ {
IF radix = 0
THEN
radix ¬ value
ELSE
Fail["Too many radix specifiers"];
};
UReal:
PROC [negative:
BOOL]
RETURNS [uReal: Any ¬
NIL] ~ {
Scan an unsigned real, returning the built value if appropriate.
Which: TYPE ~ {num, denom};
val: ARRAY Which OF Digits ¬ [num: digitsZero, denom: digitsOne];
exponent: INT ¬ 0;
exponentSign: INT ¬ 1;
Fold:
PROC [w: Which] ~ {
val[w].big ¬ BigCardinals.BigAdd[
MultiplyByCARD[val[w].big, val[w].bigScale],
BigCardinals.BigFromCard[val[w].small]
];
val[w].small ¬ 0;
val[w].bigScale ¬ 1;
};
GetVal:
PROC [w: Which]
RETURNS [BigCardinals.BigCARD] ~
INLINE {
Fold[w];
RETURN [val[w].big]
};
AccumDigit:
PROC [w: Which, digit:
CARDINAL] ~
INLINE {
-- uses radix
IF buildValue
THEN {
IF val[w].small > smallLimit
OR val[w].bigScale > smallLimit
THEN
Fold[w];
val[w].small ¬ val[w].small*radix+digit;
IF val[w].bigScale # 0
THEN
val[w].bigScale ¬ val[w].bigScale*radix;
};
};
AccumCheckedDigit:
PROC [w: Which, digit:
CARDINAL] ~ {
IF digit >= radix
THEN
Fail[Rope.ToRefText[IO.PutFR["Illegal digit `%g' in radix %g numbers", [character[char]], [integer[radix]]]]]
ELSE
AccumDigit[w, digit]
};
AccumAfterDotDigit:
PROC [digit:
CARDINAL] ~ {
AccumDigit[num, digit];
AccumDigit[denom, 0];
};
EnsureDecimal:
PROC ~ {
EnsureInexact[];
IF radix # 10
THEN
Fail[Rope.ToRefText[IO.PutFR1["The character `%g' may not appear in a decimal number", [character[char]]]]];
};
EnsureInexact:
PROC ~ {
IF exact = true
THEN
Fail[Rope.ToRefText[IO.PutFR1["The character `%g' may not appear in an exact number", [character[char]]]]]
};
State:
TYPE ~ {start, inNum, inNumHashes, initialDot, inDecimal, inDecimalHashes, afterExpMarker, afterExpSign, inExp, afterSlash, inDenom, inDenomHashes};
These states have the semantics portrayed in the following diagram.
state: State ¬ start;
[Artwork node; type 'Artwork on' to command tool]
BEGIN
-- for exits
ENABLE
IO.EndOfStream =>
SELECT state
FROM
start, initialDot, afterSlash, afterExpMarker =>
GO TO BadEOFReturn;
ENDCASE =>
GO TO OKReturn;
SELECT Next[]
FROM
IN ['0..'9] => {
AccumCheckedDigit[num, char - '0];
state ¬ inNum;
};
IN ['a..'f] => {
AccumCheckedDigit[num, char - 'a + 10];
state ¬ inNum;
};
'. => {
EnsureDecimal[];
state ¬ initialDot;
};
ENDCASE =>
GO TO BadCharReturn;
DO
-- finite state machine main loop
SELECT state
FROM
inNum =>
SELECT Peek[]
FROM
IN ['0..'9] => {
Bump[];
AccumCheckedDigit[num, char - '0];
};
IN ['a..'c] => {
Bump[];
AccumCheckedDigit[num, char - 'a + 10];
};
'd, 'e, 'f => {
Bump[];
IF radix = 16
THEN
AccumDigit[num, char - 'a + 10]
ELSE
state ¬ afterExpMarker;
};
's, 'l => {
Bump[];
state ¬ afterExpMarker;
};
'. => {
Bump[];
EnsureDecimal[];
state ¬ inDecimal;
};
'/ => {
Bump[];
state ¬ afterSlash;
};
'# => {
Bump[];
EnsureInexact[];
AccumDigit[num, 0];
state ¬ inNumHashes;
};
ENDCASE =>
GO TO OKReturn;
inNumHashes =>
SELECT Peek[]
FROM
'# => {
Bump[];
AccumDigit[num, 0];
};
'/ => {
Bump[];
state ¬ afterSlash;
};
'. => {
Bump[];
EnsureDecimal[];
state ¬ inDecimalHashes;
};
'e, 's, 'f, 'd, 'l => {
Bump[];
state ¬ afterExpMarker;
};
ENDCASE =>
GO TO OKReturn;
initialDot =>
IF Next[]
IN ['0..'9]
THEN {
AccumAfterDotDigit[char - '0];
state ¬ inDecimal;
}
ELSE
GO TO BadCharReturn;
inDecimal =>
SELECT Peek[]
FROM
IN ['0..'9] => {
Bump[];
AccumAfterDotDigit[char - '0];
};
'e, 's, 'f, 'd, 'l => {
Bump[];
state ¬ afterExpMarker;
};
'# => {
Bump[];
AccumAfterDotDigit[0];
state ¬ inDecimalHashes;
};
ENDCASE =>
GO TO OKReturn;
inDecimalHashes =>
SELECT Peek[]
FROM
'# => {
Bump[];
AccumAfterDotDigit[0]; - unnecessary
};
'e, 's, 'f, 'd, 'l => {
Bump[];
state ¬ afterExpMarker;
};
ENDCASE =>
GO TO OKReturn;
afterExpMarker => {
EnsureDecimal[];
SELECT Next[]
FROM
'+ => state ¬ afterExpSign;
'- => {
exponentSign ¬ -1;
state ¬ afterExpSign;
};
IN ['0..'9] => {
exponent ¬ char - '0;
state ¬ inExp;
};
ENDCASE =>
GO TO BadCharReturn;
};
afterExpSign =>
IF Next[]
IN ['0..'9]
THEN {
exponent ¬ exponent * 10 + char - '0;
state ¬ inExp;
}
ELSE
GO TO BadCharReturn;
inExp =>
IF Peek[]
IN ['0..'9]
THEN {
Bump[];
exponent ¬ exponent * 10 + char - '0;
}
afterSlash => {
val[denom] ¬ digitsZero;
SELECT Next[]
FROM
IN ['0..'9] => {
AccumCheckedDigit[denom, char - '0];
state ¬ inDenom;
};
IN ['a..'f] => {
AccumCheckedDigit[denom, char - 'a + 10];
state ¬ inDenom;
};
ENDCASE =>
GO TO BadCharReturn;
};
inDenom =>
SELECT Peek[]
FROM
IN ['0..'9] => {
Bump[];
AccumCheckedDigit[denom, char - '0];
};
IN ['a..'f] => {
Bump[];
AccumCheckedDigit[denom, char - 'a + 10];
};
'# => {
Bump[];
EnsureInexact[];
AccumDigit[denom, 0];
state ¬ inDenomHashes;
};
ENDCASE =>
GO TO OKReturn;
inDenomHashes =>
IF Peek[] = '#
THEN {
Bump[];
AccumDigit[denom, 0];
}
ENDCASE => ERROR;
ENDLOOP;
EXITS
BadCharReturn =>
Fail[Rope.ToRefText[IO.PutFR1["Unexpected character `%g' in number", [character[char]]]]];
BadEOFReturn => {
IF terminatedByEOF
THEN
BadEOF[]
ELSE {
tokenKind ¬ error;
error ¬ badNumber;
value ¬ StringFromRope[Rope.FromRefText[token]];
token ¬ "Numeric token ended prematurely";
};
ERROR Failed;
};
OKReturn => {
IF buildValue
THEN {
IF exact # false
AND exponent = 0
AND val[num].bigScale = 0
AND val[denom].bigScale = 0
AND val[denom].small = 1
AND val[num].small <=
CARD[
INT.
LAST]
THEN {
SPECIAL CASE - Small exact integer
uReal ¬
MakeFixnum[
IF negative
THEN
-INT[val[num].small]
];
}
ELSE IF exact = false
AND exponent = 0
AND val[num].bigScale = 0
AND val[denom].bigScale = 0
AND val[num].small <= 1000000
AND val[denom].small <= 1000000
THEN {
SPECIAL CASE - Small inexact real; can generate flonum with only one roundoff
float: REAL ¬ REAL[INT[val[num].small]] / REAL[INT[val[denom].small]];
IF negative
THEN
float ¬ -float;
uReal ¬ NEW[NumberRep.flonum ¬ [FALSE, flonum[float]]];
}
ELSE
uReal ¬
MakeReal[
negative: negative,
numerator: GetVal[num],
denominator: GetVal[denom],
exponent: exponent * exponentSign,
radix: 10, -- just for the exponent now
exact: exact # false];
};
RETURN;
};
END;
};
MakeInteger:
PROC [i:
INT]
RETURNS [Any] ~ {
Like Scheme.MakeFixnum, but pay attention to the setting of exact.
IF exact # false
THEN
RETURN [MakeFixnum[i]]
ELSE
RETURN [NEW[NumberRep.flonum ¬ [FALSE, flonum[REAL[i]]]]];
};
tokenKind ¬ number;
BEGIN
-- for exits
ENABLE {
Failed => GO TO Return;
IO.EndOfStream => GO TO BadEOFReturn;
};
Get the prefix.
WHILE Peek[] = '#
DO
Bump[];
SELECT Next[]
FROM
'b => Radix[2];
'd => Radix[8];
'o => Radix[10];
'x => Radix[16];
'w => Radix[16];
'e => Exact[true];
'i => Exact[false];
ENDCASE => Fail["Illegal #-specifier in number"];
ENDLOOP;
IF radix = 0 THEN radix ¬ 10;
Get the first possibly-signed real and check for the `+i', `-i', `+Ui', and `-Ui' cases.
SELECT Peek[]
FROM
'+, '- => {
negative: BOOL ~ (char = '-);
Bump[];
IF Peek[] = 'i
THEN {
Bump[];
IF buildValue
THEN
value ¬ MakeRectangular[MakeInteger[0], MakeInteger[IF negative THEN -1 ELSE 1]];
GO TO EndCheck;
};
value ¬ UReal[negative];
IF Peek[!
IO.EndOfStream =>
GO
TO Return] = 'i
THEN {
Bump[];
IF buildValue
THEN
value ¬ MakeRectangular[MakeInteger[0], value];
GO TO EndCheck;
};
};
ENDCASE =>
value ¬ UReal[FALSE];
Check for other complex number cases.
SELECT Peek[!
IO.EndOfStream =>
GO
TO Return]
FROM
'@ => {
magnitude: Any ¬ value;
Bump[];
SELECT Peek[]
FROM
'+, '- => {
Bump[];
value ¬ UReal[char = '-];
};
ENDCASE =>
value ¬ UReal[FALSE];
IF buildValue
THEN
value ¬ MakePolar[magnitude, value];
GO TO EndCheck;
};
'+, '- => {
negative: BOOL ~ (char = '-);
realPart: Any ¬ value;
Bump[];
IF Peek[] = 'i
THEN {
-- this is the `R+i' or `R-i' case
Bump[];
IF buildValue
THEN
value ¬ MakeRectangular[value, MakeInteger[IF negative THEN -1 ELSE 1]];
GO TO EndCheck;
};
value ¬ UReal[negative];
IF Next[] # 'i
THEN
Fail["Complex number does not end in `i'"];
IF buildValue
THEN
value ¬ MakeRectangular[realPart, value];
GO TO EndCheck;
};
ENDCASE => GO TO EndCheck;
ERROR; -- we should never get this far; every case should be taking one of the exits
EXITS
EndCheck => {
IF index # len
THEN {
tokenKind ¬ error;
error ¬ badNumber;
value ¬ StringFromRope[Rope.FromRefText[token]];
token ¬ "Numeric token contains extra characters";
};
RETURN;
};
BadEOFReturn => {
IF terminatedByEOF
THEN
BadEOF[]
ELSE {
tokenKind ¬ error;
error ¬ badNumber;
value ¬ StringFromRope[Rope.FromRefText[token]];
token ¬ "Numeric token ended prematurely";
};
RETURN;
};
END;
END;
};
ScanIdentifier:
PROC [allowTail:
BOOL] ~ {
SELECT NextChar[ !
IO.EndOfStream =>
GO
TO BadEOFReturn]
FROM
'# => {
IF NextChar[ !
IO.EndOfStream =>
GO
TO BadEOFReturn] # '"
THEN {
PutbackChar[];
BadIdentifier[];
RETURN;
}
ELSE {
ScanString[identifier];
RETURN;
};
};
'+, '-, '.,
IN ['0..'9] => {
Identifiers can't start with these, except for the `peculiar' ones "+", "-", and "...", which are handled elsewhere.
BadIdentifier[];
RETURN;
};
ENDCASE => {
IF
NOT identifierConstituent[char]
THEN {
PutbackChar[];
BadIdentifier[];
RETURN;
}
ELSE {
buffer: REF TEXT ¬ IF buildValue THEN RefText.ObtainScratch[100] ELSE NIL;
AddChar:
PROC ~ {
IF buildValue
THEN
buffer ¬ RefText.AppendChar[buffer, char];
};
AddChar[];
DO
SELECT NextChar[ !
IO.EndOfStream =>
EXIT]
FROM
Ascii.
SP, Ascii.
TAB, Ascii.
CR, Ascii.
LF, Ascii.
FF, '(, '), '", '; => {
PutbackChar[];
EXIT;
};
'# => {
IF allowTail
THEN {
PutbackChar[];
EXIT;
}
ELSE {
BadIdentifier[];
RETURN;
}
};
ENDCASE =>
IF identifierConstituent[char]
THEN
AddChar[]
ELSE {
BadIdentifier[];
RETURN;
};
ENDLOOP;
tokenKind ¬ identifier;
IF buildValue
THEN {
value ¬ Atom.MakeAtomFromRefText[buffer];
RefText.ReleaseScratch[buffer];
};
};
};
EXITS
BadEOFReturn => {
BadEOF[];
RETURN;
};
};
ScanIdentifierTail:
PROC ~ {
tokenKind is identifier, and, if buildValue, value is the appropriate symbol. Check for this symbol being the first part of a structured name.
IF
IO.PeekChar[stream !
IO.EndOfStream =>
GO
TO Return] = '#
THEN {
interfaceName: Any ~ value;
[] ¬ NextChar[]; -- skip the hash mark
ScanIdentifier[FALSE];
IF tokenKind = identifier
THEN {
-- as opposed to `error'
tokenKind ¬ moduleReference;
IF buildValue
THEN
value ¬ Cons[$access, Cons[interfaceName, Cons[value, NIL]]];
};
};
};
ScanString:
PROC [nonErrorKind: TokenKind] ~ {
buffer: REF TEXT ¬ IF buildValue THEN RefText.ObtainScratch[100] ELSE NIL;
AddChar:
PROC [c:
CHAR] ~ {
IF buildValue
THEN
buffer ¬ RefText.AppendChar[buffer, c];
};
tokenKind ¬ nonErrorKind;
DO
SELECT NextStringChar[ !
IO.EndOfStream =>
EXIT]
FROM
'" => {
IF buildValue
THEN {
SELECT nonErrorKind
FROM
string => value ¬ StringFromRope[Rope.FromRefText[buffer]];
identifier => value ¬ Atom.MakeAtomFromRefText[buffer];
ENDCASE => ERROR;
RefText.ReleaseScratch[buffer];
};
RETURN;
};
'\\ => {
SELECT NextChar[ !
IO.EndOfStream =>
EXIT]
FROM
'b => AddChar['\b];
'f, 'p => AddChar['\f];
'l => AddChar['\l];
'n => AddChar['\n];
'r => AddChar['\r];
't => AddChar['\t];
'", '\\ => AddChar[char];
IN ['0..'3] =>
{
ascii: [0 .. 377B] ¬ ORD[char] - ORD['0];
FOR i:
INT
IN [3..4]
DO
SELECT NextChar[ !
IO.EndOfStream =>
EXIT]
FROM
IN ['0..'7] => ascii ¬ ascii * 10B + ORD[char] - ORD['0];
ENDCASE => {
tokenKind ¬ error;
error ¬ unknownStringEscape;
value ¬ StringFromRope[Rope.FromRefText[s: token, start: token.length - i, len: i]];
token ¬ "Unknown escape sequence in string";
RETURN;
};
ENDLOOP;
AddChar[VAL[ascii]];
};
ENDCASE => {
tokenKind ¬ error;
error ¬ unknownStringEscape;
value ¬ StringFromRope[Rope.FromRefText[s: token, start: token.length - 2, len: 2]];
token ¬ "Unknown escape sequence in string";
RETURN;
};
};
ENDCASE => AddChar[char];
ENDLOOP;
We can only get here on EOF.
BadEOF[];
RETURN;
};
token ¬ buffer;
BEGIN
-- for exits
DO
-- until we get a real token
token.length ¬ 0;
SELECT NextChar[ !
IO.EndOfStream =>
GO
TO EOFReturn]
FROM
Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF => LOOP;
'; => {
tokenKind ¬ comment;
IF flushComments
THEN {
DO
SELECT NextCharSkip[ !
IO.EndOfStream =>
GO
TO EOFReturn]
FROM
Ascii.CR, Ascii.LF => EXIT;
ENDCASE;
ENDLOOP;
}
ELSE
DO
SELECT NextChar[ !
IO.EndOfStream =>
GO
TO Return]
FROM
Ascii.
CR, Ascii.
LF =>
GO
TO PutbackReturn;
Don't include the CR in the comment
ENDCASE;
ENDLOOP;
};
'( => {
tokenKind ¬ openParenthesis;
RETURN};
') => {
tokenKind ¬ closeParenthesis;
RETURN};
'' => {
tokenKind ¬ quote;
RETURN};
'` => {
tokenKind ¬ quasiquote;
RETURN};
', => {
tokenKind ¬ unquote;
IF NextChar[ !
IO.EndOfStream =>
GO
TO Return] = '@
THEN {
tokenKind ¬ unquoteSplicing;
RETURN;
}
ELSE
GO TO PutbackReturn;
};
IN ['0 .. '9] => {
ScanNumber[];
RETURN};
'. => {
-- either dot or the peculiar identifier "..." or a number.
tokenKind ¬ dot;
SELECT NextChar[ !
IO.EndOfStream =>
GO
TO Return]
FROM
Ascii.
SP, Ascii.
TAB, Ascii.
CR, Ascii.
LF, Ascii.
FF, '(, '), '", '; =>
GO TO PutbackReturn;
IN ['0 .. '9] => {
ScanNumber[];
RETURN};
'. => {
SELECT NextChar[ !
IO.EndOfStream =>
GO
TO BadEOFReturn]
FROM
Ascii.
SP, Ascii.
TAB, Ascii.
CR, Ascii.
LF, Ascii.
FF, '(, '), '", ' => {
PutbackChar[];
BadIdentifier[];
RETURN};
'. => NULL;
ENDCASE => {
BadIdentifier[];
RETURN};
tokenKind ¬ identifier;
IF buildValue
THEN
value ¬ Atom.MakeAtom["..."];
Check that the token is properly delimited.
SELECT
IO.PeekChar[stream !
IO.EndOfStream =>
GO
TO Return]
FROM
Ascii.
SP, Ascii.
TAB, Ascii.
CR, Ascii.
LF, Ascii.
FF, '(, '), '", '; =>
RETURN;
'# => {
ScanIdentifierTail[];
RETURN};
ENDCASE => {
BadIdentifier[];
RETURN};
};
ENDCASE => {
BadIdentifier[];
RETURN};
};
'+, '- => {
-- either a peculiar identifier ("+" or "-") or a number.
tokenKind ¬ identifier;
IF buildValue
THEN
value ¬ Atom.MakeAtomFromChar[char];
SELECT
IO.PeekChar[stream !
IO.EndOfStream =>
GO
TO Return]
FROM
Ascii.
SP, Ascii.
TAB, Ascii.
CR, Ascii.
LF, Ascii.
FF, '(, '), '", '; =>
RETURN;
'# => {
ScanIdentifierTail[];
RETURN};
ENDCASE => {
ScanNumber[];
RETURN};
};
'#=> {
SELECT NextChar[ !
IO.EndOfStream =>
GO
TO BadEOFReturn]
FROM
't, 'f => {
tokenKind ¬ boolean;
IF buildValue
THEN
value ¬ IF char = 't THEN true ELSE false;
RETURN};
'b, 'o, 'd, 'x, 'w, 'i, 'e => {
ScanNumber[];
RETURN};
'\\ => {
found: BOOL;
buffer: REF TEXT ¬ RefText.ObtainScratch[10];
DO
SELECT NextStringChar[ !
IO.EndOfStream =>
IF buffer.length = 0
THEN
GO
TO BadEOFReturn
ELSE
EXIT]
FROM
Ascii.
SP, Ascii.
TAB, Ascii.
CR, Ascii.
LF, Ascii.
FF, '(, '), '", '; => {
IF buffer.length = 0
THEN
buffer ¬ RefText.AppendChar[buffer, char]
EXIT;
};
ENDCASE => buffer ¬ RefText.AppendChar[buffer, char];
ENDLOOP;
tokenKind ¬ character;
We must treat one-character names specially because the SymTab below is case-insensitive.
IF buffer.length = 1
THEN {
IF buildValue
THEN
value ¬ MakeChar[buffer[0]];
RETURN;
};
[found, value] ¬ SymTab.Fetch[x: charForName, key: RefText.TrustTextAsRope[buffer]];
RefText.ReleaseScratch[buffer];
IF
NOT found
THEN {
tokenKind ¬ error;
error ¬ unknownCharacterName;
value ¬ StringFromRope[Rope.FromRefText[token]];
token ¬ "Unknown named character"
};
RETURN};
'( => {
tokenKind ¬ openVector;
RETURN};
'" => {
ScanString[identifier];
IF tokenKind = identifier
THEN
-- as opposed to `error'
ScanIdentifierTail[];
RETURN};
'! => {
found: BOOL;
buffer: REF TEXT ¬ RefText.ObtainScratch[10];
DO
SELECT NextStringChar[ !
IO.EndOfStream =>
IF buffer.length = 0
THEN
GO
TO BadEOFReturn
ELSE
EXIT]
FROM
Ascii.
SP, Ascii.
TAB, Ascii.
CR, Ascii.
LF, Ascii.
FF, '(, '), '", '; => {
IF buffer.length = 0
THEN
buffer ¬ RefText.AppendChar[buffer, char]
EXIT;
};
ENDCASE => buffer ¬ RefText.AppendChar[buffer, char];
ENDLOOP;
tokenKind ¬ character;
[found, value] ¬ SymTab.Fetch[x: primitiveSyntaxForName, key: RefText.TrustTextAsRope[buffer]];
RefText.ReleaseScratch[buffer];
IF
NOT found
THEN {
tokenKind ¬ error;
error ¬ unknownPrimitiveSyntax;
value ¬ StringFromRope[Rope.FromRefText[token]];
token ¬ "Unknown primitive syntax marker"
};
RETURN};
ENDCASE => {
tokenKind ¬ error;
error ¬ unknownHashDispatch;
value ¬ StringFromRope[Rope.FromRefText[token]];
token ¬ "Unknown # token";
RETURN};
};
'"=> {
ScanString[string];
RETURN};
ENDCASE => {
PutbackChar[];
ScanIdentifier[TRUE];
IF tokenKind = identifier
THEN
ScanIdentifierTail[];
RETURN};
ENDLOOP;
EXITS
Return => { RETURN };
PutbackReturn => {
PutbackChar[];
RETURN};
BadEOFReturn => {
BadEOF[];
RETURN};
EOFReturn => {
tokenKind ¬ endOfFile;
RETURN};
END;
};
primitiveSyntaxForName: SymTab.Ref ~ InitPrimitiveSyntax[];
InitPrimitiveSyntax:
PROC
RETURNS [table: SymTab.Ref] ~ {
table ¬ SymTab.Create[case: FALSE];
FOR ps: PrimitiveSyntaxRep
IN PrimitiveSyntaxRep
DO
[] ¬ SymTab.Store[x: table, key: Atom.GetPName[symbolForPrimitiveSyntaxRep[ps]], val: primitiveSyntaxForPrimitiveSyntaxRep[ps]];
ENDLOOP;
};
charForName: SymTab.Ref ~ SymTab.Create[case: FALSE];
nameForChar: REF ARRAY CHAR OF ROPE ~ InitCharNames[];
InitCharNames:
PROC
RETURNS [
REF
ARRAY
CHAR
OF
ROPE] ~ {
a: REF ARRAY CHAR OF ROPE ¬ NEW[ARRAY CHAR OF ROPE];
AddName:
PROC [char:
CHAR, name:
ROPE] ~ {
[] ¬ SymTab.Store[x: charForName, key: name, val: MakeChar[char]];
a[char] ¬ name;
};
Names are added in increasing order of preference on output. That is, the last name assigned to each character is the one to be used on output.
FOR c:
CHAR
IN
CHAR
DO
AddName[c, Rope.Flatten[IO.PutFR1["%03b", [integer[ORD[c]]]]]];
ENDLOOP;
AddName['(, Rope.Flatten["OpenPar"]];
AddName['(, Rope.Flatten["OpenParen"]];
AddName['), Rope.Flatten["ClosePar"]];
AddName['), Rope.Flatten["CloseParen"]];
FOR c:
CHAR
IN (' ..'~]
DO
AddName[c, Rope.FromChar[c]];
ENDLOOP;
AddName['\r, Rope.Flatten["Return"]];
AddName['\l, Rope.Flatten["LineFeed"]];
AddName['\n, Rope.Flatten["Newline"]];
AddName['\t, Rope.Flatten["Tab"]];
AddName[' , Rope.Flatten["Space"]];
AddName['\f, Rope.Flatten["FormFeed"]];
AddName['\f, Rope.Flatten["Page"]];
RETURN [a]
};
identifierConstituent: REF ARRAY CHAR OF BOOL ~ InitIdentifierConstituent[];
InitIdentifierConstituent:
PROC
RETURNS [a:
REF
ARRAY
CHAR
OF
BOOL] ~ {
a ¬ NEW[ARRAY CHAR OF BOOL ¬ ALL[FALSE]];
FOR c:
CHAR
IN ['0..'9]
DO
a[c] ¬ TRUE;
ENDLOOP;
FOR c:
CHAR
IN ['A..'Z]
DO
a[c] ¬ TRUE;
ENDLOOP;
FOR c:
CHAR
IN ['a..'z]
DO
a[c] ¬ TRUE;
ENDLOOP;
a['!] ¬ TRUE;
a['$] ¬ TRUE;
a['%] ¬ TRUE;
a['&] ¬ TRUE;
a['*] ¬ TRUE;
a['/] ¬ TRUE;
a[':] ¬ TRUE;
a['<] ¬ TRUE;
a['=] ¬ TRUE;
a['>] ¬ TRUE;
a['?] ¬ TRUE;
a['~] ¬ TRUE;
a['←] ¬ TRUE;
a['^] ¬ TRUE;
a['.] ¬ TRUE;
a['+] ¬ TRUE;
a['-] ¬ TRUE;
};
Testing
Test:
PROC [name:
ROPE, out:
IO.
STREAM] ~ {
in: IO.STREAM ~ FS.StreamOpen[name];
buffer: REF TEXT ¬ NEW[TEXT[10]];
token: REF TEXT ¬ NEW[TEXT[10]];
tokenKind: TokenKind;
error: ScanningError;
value: Any;
DO
[tokenKind, token, error, value] ¬ GetToken[in, buffer, FALSE, TRUE];
SELECT tokenKind
FROM
identifier, moduleReference, boolean, number, character, string, primitiveSyntax =>
IO.PutF[out, "%g: %g\n", [refAny[NEW[TokenKind ¬ tokenKind]]], [refAny[value]]];
endOfFile => EXIT;
error =>
IO.PutF[out, "**Error (%g)** %g\n\t%g\n", [refAny[NEW[ScanningError ¬ error]]], [text[token]], [refAny[value]]];
comment => IO.PutF1[out, "%g\n", [text[token]]];
ENDCASE =>
IO.PutF[out, "%g: %g\n", [refAny[NEW[TokenKind ¬ tokenKind]]], [text[token]]];
ENDLOOP;
IO.Close[in];
};
END.