Scanner.mesa
Copyright © 1985 by Xerox Corporation. All rights reserved.
Satterthwaite, May 20, 1986 11:07:17 am PDT
Maxwell, August 11, 1983 2:09 pm
Paul Rovner, September 22, 1983 3:48 pm
Russ Atkinson (RRA) December 3, 1986 6:25:58 pm PST
DIRECTORY
Ascii: TYPE USING [BS, CR, FF, LF, TAB],
Basics: TYPE USING [charsPerWord, Word],
ConvertUnsafe: TYPE USING [SubString],
IO: TYPE USING [
card, EndOf, GetChar, GetIndex, Put, PutChar, rope, SetIndex, STREAM, UnsafeGetBlock],
LiteralOps: TYPE USING [FindDescriptor, Find, FindString],
P1: TYPE USING [Index, Token, Value, nullValue],
ParseTable: TYPE USING [
HashIndex, HashTableRef, IndexTableRef, ScanTableRef, TableRef, TSymbol, VocabularyRef,
endMarker, tokenARROW, tokenATOM, tokenCHAR, tokenDOT, tokenDOTS, tokenEQUAL,
tokenGE, tokenGREATER, tokenID, tokenLE, tokenLESS, tokenFLNUM, tokenLNUM,
tokenLSTR, tokenMINUS, tokenNE, tokenNUM, tokenSTR, tokenTILDE],
Real: TYPE USING [RealException, PairToReal],
RefText: TYPE USING [Append],
Rope: TYPE USING [ROPE],
SymbolOps: TYPE USING [EnterString],
VM: TYPE USING [wordsPerPage];
Scanner: PROGRAM
IMPORTS IO, LiteralOps, Real, RefText, SymbolOps
EXPORTS P1 = {
OPEN ParseTable;
table installation
tablePtr: ParseTable.TableRef;
hashTab: HashTableRef;
scanTab: ScanTableRef;
vocab: VocabularyRef;
vocabIndex: IndexTableRef;
InstallScanTable: PUBLIC PROC[base: ParseTable.TableRef] = {
tablePtr ← base;
hashTab ← @tablePtr[tablePtr.scanTable.hashTab];
scanTab ← @tablePtr[tablePtr.scanTable.scanTab];
vocab ← LOOPHOLE[@tablePtr[tablePtr.scanTable.vocabBody]];
vocabIndex ← @tablePtr[tablePtr.scanTable.vocabIndex]};
scanner state
stream: IO.STREAMNIL;  -- the input stream
streamOrigin: P1.Index; -- FileStream.FileByteIndex
Logger: PROC[PROC [log: IO.STREAM]] ← NIL;
textPages: NAT ~ 6;
textWords: NAT ~ textPages*VM.wordsPerPage;
textChars: NAT ~ textWords*Basics.charsPerWord;
charsPerPage: NAT ~ Basics.charsPerWord*VM.wordsPerPage;
TextBuffer: TYPE ~ PACKED ARRAY [0..textChars) OF CHAR;
tB: REF TextBuffer;
tI, tMax: [0..textChars];
tOrigin, tLimit: P1.Index;
tEnded: BOOL;
FillBuffer: PROC ~ {
tOrigin ← tLimit;
IF tEnded THEN tMax ← 0
ELSE {
tMax ← stream.UnsafeGetBlock[[LOOPHOLE[tB], 0, textChars]].nBytesRead;
IF tMax < textChars THEN tEnded ← TRUE;
tLimit ← tOrigin + tMax};
IF tMax = 0 THEN {tB[0] ← '\000; tMax ← 1};
tI ← 0};
buffer: REF TEXTNIL;  -- token assembly area
iMax: CARDINAL;   -- iMax = buffer.maxLength
desc: ConvertUnsafe.SubString; -- initial buffer segment
nTokens: NAT;    -- token count
nErrors: NAT;    -- lexical errors
BufferOverflow: ERROR ~ CODE;
ExpandBuffer: PROC ~ {
oldBuffer: REF TEXT ← buffer;
IF oldBuffer.length > 2000 THEN ERROR BufferOverflow;
buffer ← NEW[TEXT[2*buffer.length]];
desc.base ← LOOPHOLE[buffer, LONG POINTER];
buffer ← RefText.Append[to~buffer, from~oldBuffer];
iMax ← buffer.length ← buffer.maxLength};
char: CHAR;  -- current (most recently scanned) character
qDot: BOOL;  -- used to resolved decimal point vs. interval
NextChar: PROC ~ {
IF (tI←tI+1) = tMax THEN FillBuffer[];
char ← tB[tI];
};
NextCharInline: PROC RETURNS [eof: BOOL] ~ {
IF (tI←tI+1) = tMax THEN {IF tEnded THEN RETURN [TRUE]; FillBuffer[]};
char ← tB[tI];
RETURN [FALSE];
};
NextToken: PUBLIC PROC RETURNS[token: P1.Token] ~ {
OPEN token;
DO
WHILE char IN ['\000..' ] DO
SELECT char FROM
'\000 => {  -- ^@^@ is Tioga escape seq
IF NextCharInline[] THEN GO TO EndFile;
IF char = '\000 THEN GO TO EndFile};
ENDCASE =>
IF NextCharInline[] THEN GO TO EndFile;
ENDLOOP;
index ← tOrigin + tI;
value ← P1.nullValue;
{
SELECT char FROM
'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm, 'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z => {
i: CARDINAL ← 0;
DO
buffer[i] ← char;
IF (tI←tI+1) = tMax THEN FillBuffer[];
char ← tB[tI];
SELECT char FROM
IN ['a..'z], IN ['A..'Z], IN ['0..'9] =>
IF (i ← i+1) >= iMax THEN ExpandBuffer[];
ENDCASE => EXIT;
ENDLOOP;
desc.length ← i+1;
class ← tokenID; value.r ← SymbolOps.EnterString[desc];
GO TO GotNext};
'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M, 'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z => {
i: CARDINAL ← 0;
uId: BOOLTRUE;
first, last: NAT ← char.ORD;
DO
buffer[i] ← char;
IF (tI←tI+1) = tMax THEN FillBuffer[];
char ← tB[tI];
SELECT char FROM
IN ['A..'Z] => {
last ← char.ORD; IF (i ← i+1) >= iMax THEN ExpandBuffer[]};
IN ['a..'z], IN ['0..'9] => {
uId ← FALSE; IF (i ← i+1) >= iMax THEN ExpandBuffer[]};
ENDCASE => EXIT;
ENDLOOP;
i ← i+1;
IF uId THEN {
h: HashIndex ← ((first*128-first) + last) MOD HashIndex.LAST + 1;
j, s1, s2: CARDINAL;
WHILE (j ← hashTab[h].symbol) # 0 DO
IF vocabIndex[j]-(s2←vocabIndex[j-1]) = i THEN
FOR s1 IN [0 .. i) DO
IF buffer[s1] # vocab.text[s2] THEN EXIT;
s2 ← s2+1;
REPEAT
FINISHED => {class ← j; GO TO GotNext};
ENDLOOP;
IF (h ← hashTab[h].link) = 0 THEN EXIT;
ENDLOOP};
desc.length ← i;
class ← tokenID; value.r ← SymbolOps.EnterString[desc];
GO TO GotNext};
'0, '1, '2, '3, '4, '5, '6, '7, '8, '9 => {
valid: BOOL;
[class, value, valid] ← CollectNumber[i~0];
IF ~valid THEN ScanError[$number, index];
GO TO GotNext};
',, ';, ':, '←, '#, '+, '*, '/, '^, '@, '!, '(, '), '[, '], '{, '} => {
class ← scanTab[char];
GO TO GetNext};
'' => {
c: CHAR;
valid, advance: BOOL;
NextChar[];
[c, valid, advance] ← Escape[];
IF ~valid THEN ScanError[$escape, index + 1];
class ← tokenCHAR; value.r ← LiteralOps.Find[c.ORD];
IF advance THEN GO TO GetNext ELSE GO TO GotNext};
'" => {
i: CARDINAL ← 0;
valid: BOOL;
advance: BOOLTRUE;
DO
IF advance THEN
IF NextCharInline[] THEN GO TO EOFEnd;
SELECT char FROM
'" => {
IF (tI←tI+1) = tMax THEN FillBuffer[];
char ← tB[tI];
IF char # '" THEN GO TO QuoteEnd};
ENDCASE;
IF i >= iMax THEN ExpandBuffer[
! BufferOverflow => {ScanError[$string, index]; i ← 0; CONTINUE}];
[buffer[i], valid, advance] ← Escape[]; i ← i+1;
IF ~valid THEN ScanError[$escape, tOrigin + tI];
REPEAT
QuoteEnd => NULL;
EOFEnd => {ScanError[$string, index]; FillBuffer[]; char ← tB[tI]};
ENDLOOP;
desc.length ← i;
value.r ← LiteralOps.FindString[desc];
IF char = 'l OR char = 'L THEN {class ← tokenLSTR; GO TO GetNext}
ELSE {class ← tokenSTR; GO TO GotNext}};
'$, '\244 => {
note: 244c = ¤ in the Xerox Character Code standard
i: CARDINAL;
i ← 0; NextChar[];
SELECT char FROM
IN ['a..'z], IN ['A..'Z] => NULL;
ENDCASE => ScanError[$atom, index];
DO
SELECT char FROM
IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {
IF i >= iMax THEN ExpandBuffer[]; buffer[i] ← char; i ← i+1};
ENDCASE => EXIT;
NextChar[];
ENDLOOP;
desc.length ← i;
class ← tokenATOM; value.r ← SymbolOps.EnterString[desc];
GO TO GotNext};
'- => {
NextChar[];
IF char # '- THEN {class ← tokenMINUS; GO TO GotNext};
char ← '\000;
DO
pChar: CHAR ~ char;
IF NextCharInline[] THEN GO TO EndFile;
SELECT char FROM
'- => IF pChar = '- THEN EXIT;
'\n => EXIT;
ENDCASE;
ENDLOOP;
NextChar[]};
'. => {
IF qDot THEN {
qDot ← FALSE; index ← index-1; class ← tokenDOTS; GO TO GetNext};
NextChar[];
SELECT char FROM
'. => {class ← tokenDOTS; GO TO GetNext};
IN ['0..'9] => {
valid: BOOL;
buffer[0] ← '.;
[class, value, valid] ← CollectNumber[i~1, float~TRUE];
IF ~valid THEN ScanError[$number, index];
GO TO GotNext};
ENDCASE => {class ← tokenDOT; GO TO GotNext}};
'= => {
NextChar[];
IF char = '> THEN {class ← tokenARROW; GO TO GetNext}
ELSE {class ← tokenEQUAL; GO TO GotNext}};
'< => {
NextChar[];
SELECT char FROM
'= => {class ← tokenLE; GO TO GetNext};
'< => GO TO ScanComment;
ENDCASE => {class ← tokenLESS; GO TO GotNext};
};
'> => {
NextChar[];
IF char = '= THEN {class ← tokenGE; GO TO GetNext}
ELSE {class ← tokenGREATER; GO TO GotNext}};
'~ => {
NextChar[];
SELECT char FROM
'= => {class ← tokenNE; GO TO GetNext};
'< => {class ← tokenGE; GO TO GetNext};
'> => {class ← tokenLE; GO TO GetNext}
ENDCASE => {class ← tokenTILDE; GO TO GotNext}};
'\253 => GO TO ScanComment;
« in the Xerox Character Code Standard
'\254 => {class ← scanTab['←]; GO TO GetNext};
¬ in the Xerox Character Code Standard
'\255 => {class ← scanTab['^]; GO TO GetNext};
­ in the Xerox Character Code Standard
'\264 => {class ← scanTab['*]; GO TO GetNext};
´ in the Xerox Character Code Standard (multiplication)
'\270 => {class ← scanTab['/]; GO TO GetNext};
¸ in the Xerox Character Code Standard (division)
ENDCASE => {
class ← scanTab[char];
IF class # 0 THEN GO TO GetNext;
NextChar[];
ScanError[$char, index];
};
EXITS ScanComment => {
state: {plain, leftBrocket, rightBrocket} ← plain;
nest: CARDINAL ← 1;
DO
IF NextCharInline[] THEN GO TO EndFile;
SELECT char FROM
'> => SELECT state FROM
plain, leftBrocket => state ← rightBrocket;
rightBrocket => {state ← plain; nest ← nest - 1; IF nest = 0 THEN EXIT};
ENDCASE;
'< => SELECT state FROM
plain, rightBrocket => state ← leftBrocket;
leftBrocket => {state ← plain; nest ← nest + 1};
ENDCASE;
'\253 => {
« in the Xerox Character Code Standard
state ← plain;
nest ← nest + 1;
};
'\273 => {
» in the Xerox Character Code Standard
state ← plain; nest ← nest - 1; IF nest = 0 THEN EXIT;
};
Ascii.CR purposely don't count lines here
ENDCASE => state ← plain;
ENDLOOP;
NextChar[];
};
};
REPEAT
GetNext => {IF (tI←tI+1) = tMax THEN FillBuffer[]; char ← tB[tI]};
GotNext => NULL;
EndFile => {
class ← endMarker; index ← tOrigin + (tI-1); value ← P1.nullValue;
UNTIL tEnded DO FillBuffer[] ENDLOOP;  -- flush stream
FillBuffer[]; char ← tB[tI]};
ENDLOOP;
nTokens ← nTokens + 1;
};
numerical conversion
LongLit: TYPE ~ LONG UNSPECIFIED;
endMark: CHAR ~ '\000;
CollectNumber: PROC[i: CARDINAL, float: BOOLFALSE]
RETURNS[class: TSymbol, value: P1.Value, valid: BOOL] ~ {
hexCount: NAT ← 0;
hexSig: PACKED ARRAY CHAR['a..'h] OF {F, T} ← ALL[F];
v: LongLit;
Accept: PROC ~ INLINE {
buffer[i] ← char;
IF (i ← i+1) >= iMax THEN ExpandBuffer[];
NextChar[]};
maxWord: LONG CARDINAL ~ Basics.Word.LAST;
class ← tokenLNUM;
DO
SELECT char FROM
IN ['0..'9] => Accept[];
'e, 'E => {
hexSig['e] ← T; hexCount ← hexCount + 1; Accept[];
IF hexCount = 1 AND char = '+ OR char = '- THEN {
float ← TRUE; Accept[]}};
IN ['a..'f] => {hexSig[char] ← T; hexCount ← hexCount+1; Accept[]};
IN ['A..'F] => {
hexSig[char+('a-'A)] ← T; hexCount ← hexCount+1; Accept[]};
'h, 'H => {hexSig['h] ← T; hexCount ← hexCount+1; Accept[]};
'. => {
IF hexCount # 0 OR float THEN EXIT;
NextChar[];
IF char = '. THEN {qDot ← TRUE; EXIT};
float ← TRUE;
buffer[i] ← '.;
IF (i ← i+1) >= iMax THEN ExpandBuffer[]};
ENDCASE => EXIT;
ENDLOOP;
buffer[i] ← endMark;
SELECT TRUE FROM
float => {class ← tokenFLNUM; [v, valid] ← ScanFloating[buffer]};
(hexSig['h] = T) => [v, valid] ← ScanHex[buffer];
ENDCASE =>
SELECT hexCount FROM
0 => [v, valid] ← ScanDecimal[buffer];
1 =>
SELECT hexSig FROM
[F,T,F,F,F,F,F,F] => [v, valid] ← ScanOctal[buffer];
[F,F,T,F,F,F,F,F] => {
class ← tokenCHAR;
[v, valid] ← ScanOctalChar[buffer]};
[F,F,F,T,F,F,F,F] => [v, valid] ← ScanDecimal[buffer];
[F,F,F,F,T,F,F,F] => {
class ← tokenFLNUM;
[v, valid] ← ScanFloating[buffer]};
ENDCASE => [v, valid] ← ScanHex[buffer];
ENDCASE => [v, valid] ← ScanHex[buffer];
SELECT class FROM
tokenCHAR => value ← EnterLit[v, FALSE];
tokenFLNUM => value ← EnterLit[v];
ENDCASE =>
IF LOOPHOLE[v, LONG CARDINAL] > maxWord THEN
value ← EnterLit[v]
ELSE {class ← tokenNUM; value ← EnterLit[v, FALSE]};
RETURN};
Digit: ARRAY CHAR ['0..'9] OF [0..9] ~ [0,1,2,3,4,5,6,7,8,9];
HexDigit: ARRAY CHAR ['A..'F] OF [0..15] ~ [10,11,12,13,14,15];
AppendDecimal: PROC[v: LONG CARDINAL, digit: CHAR ['0..'9]]
RETURNS[newV: LONG CARDINAL, valid: BOOL] ~ {
maxV: LONG CARDINAL ~ 429496729; -- (2**32-1)/10
maxD: NAT ~ 5;  -- (2**32-1) MOD 10
d: [0..9] ~ Digit[digit];
valid ← v < maxV OR (v = maxV AND d <= maxD);
newV ← 10*v + d;
RETURN};
AppendOctal: PROC[v: LONG CARDINAL, digit: CHAR ['0..'7]]
RETURNS[newV: LONG CARDINAL, valid: BOOL] ~ {
maxV: LONG CARDINAL ~ 3777777777b; -- (2**32-1)/8
d: [0..7] ~ Digit[digit];
valid ← (v <= maxV);
newV ← 8*v + d;
RETURN};
AppendHex: PROC[v: LONG CARDINAL, digit: CHAR ['0..'F]]
RETURNS[newV: LONG CARDINAL, valid: BOOL] ~ {
maxV: LONG CARDINAL ~ 0FFFFFFFh; -- (2**32-1)/16
d: [0..15] ~ IF digit IN ['0..'9] THEN Digit[digit] ELSE HexDigit[digit];
valid ← (v <= maxV);
newV ← 16*v + d;
RETURN};
AppendToScale: PROC[v: CARDINAL, digit: CHAR ['0..'9]]
RETURNS[newV: CARDINAL, valid: BOOL] ~ {
maxV: NAT ~ 6553;  -- (2**16-1)/10
maxD: NAT ~ 5;  -- (2**16-1) MOD 10
d: [0..9] ~ Digit[digit];
valid ← v < maxV OR (v = maxV AND d <= maxD);
newV ← 10*v + d;
RETURN};
ValidFraction: PROC[v: LONG CARDINAL, digit: CHAR ['0..'9]] RETURNS[BOOL] ~ {
maxV: LONG CARDINAL ~ 214748364; -- (2**31-1)/10
maxD: NAT ~ 7;  -- (2**31-1) MOD 10
RETURN[v < maxV OR (v = maxV AND Digit[digit] <= maxD)]};
ScanDecimal: PROC[s: REF TEXT] RETURNS[value: LongLit, valid: BOOLTRUE] ~ {
c: CHAR;
i: CARDINAL ← 0;
v: LONG CARDINAL ← 0;
IF s[i] NOT IN ['0..'9] THEN valid ← FALSE;
WHILE (c ← s[i]) IN ['0..'9] DO
IF valid THEN [v, valid] ← AppendDecimal[v, c];
i ← i+1;
ENDLOOP;
IF c = 'd OR c = 'D THEN {
scale: CARDINAL ← 0;
WHILE (c ← s[i←i+1]) IN ['0..'9] DO
IF valid THEN [scale, valid] ← AppendToScale[scale, c];
ENDLOOP;
THROUGH [1 .. scale] WHILE valid DO
[v, valid] ← AppendDecimal[v, '0] ENDLOOP};
IF c # endMark THEN valid ← FALSE;
value ← v;
RETURN};
ScanOctal: PROC[s: REF TEXT] RETURNS[value: LongLit, valid: BOOLTRUE] ~ {
c: CHAR;
i: CARDINAL ← 0;
v: LONG CARDINAL ← 0;
IF s[i] NOT IN ['0..'7] THEN valid ← FALSE;
WHILE (c ← s[i]) IN ['0..'7] DO
IF valid THEN [v, valid] ← AppendOctal[v, c];
i ← i+1;
ENDLOOP;
IF c = 'b OR c = 'B THEN {
scale: CARDINAL ← 0;
WHILE (c ← s[i←i+1]) IN ['0..'9] DO
IF valid THEN [scale, valid] ← AppendToScale[scale, c];
ENDLOOP;
THROUGH [1 .. scale] WHILE valid DO
[v, valid] ← AppendOctal[v, '0] ENDLOOP};
IF c # endMark THEN valid ← FALSE;
value ← v;
RETURN};
ScanOctalChar: PROC[s: REF TEXT] RETURNS[value: LongLit, valid: BOOLTRUE] ~ {
c: CHAR;
maxChar: NAT ~ 377b;
i: CARDINAL ← 0;
v: LONG CARDINAL ← 0;
IF s[i] NOT IN ['0..'7] THEN valid ← FALSE;
WHILE (c ← s[i]) IN ['0..'7] DO
IF valid THEN [v, valid] ← AppendOctal[v, c];
i ← i+1;
ENDLOOP;
IF c = 'c OR c = 'C THEN c ← s[i←i+1] ELSE valid ← FALSE;
IF c # endMark OR v NOT IN [0 .. maxChar] THEN valid ← FALSE;
value ← v;
RETURN};
ScanHex: PROC[s: REF TEXT] RETURNS[value: LongLit, valid: BOOLTRUE] ~ {
c: CHAR;
i: CARDINAL ← 0;
v: LONG CARDINAL ← 0;
IF s[i] NOT IN ['0..'9] THEN valid ← FALSE;
DO
SELECT (c ← s[i]) FROM
IN ['0..'9], IN ['A..'F] =>
IF valid THEN [v, valid] ← AppendHex[v, c];
IN ['a..'f] =>
IF valid THEN [v, valid] ← AppendHex[v, VAL[(c.ORD-'a.ORD)+'A.ORD]];
ENDCASE => EXIT;
i ← i + 1;
ENDLOOP;
IF c = 'h OR c = 'H THEN {
scale: CARDINAL ← 0;
WHILE (c ← s[i←i+1]) IN ['0..'9] DO
IF valid THEN [scale, valid] ← AppendToScale[scale, c];
ENDLOOP;
THROUGH [1 .. scale] WHILE valid DO
[v, valid] ← AppendHex[v, '0] ENDLOOP};
IF c # endMark THEN valid ← FALSE;
value ← v;
RETURN};
ScanFloating: PROC[s: REF TEXT] RETURNS[value: LongLit, valid: BOOLTRUE] ~ {
c: CHAR;
i: CARDINAL ← 0;
v: LONG CARDINAL ← 0;
exp: INTEGER ← 0;
WHILE (c ← s[i]) IN ['0..'9] DO
valid ← valid AND ValidFraction[v, c];
IF valid THEN v ← AppendDecimal[v, c].newV
ELSE exp ← exp + 1; -- should round
i ← i+1;
ENDLOOP;
IF c = '. THEN {
i ← i+1;
IF s[i] NOT IN ['0..'9] THEN valid ← FALSE;
WHILE (c ← s[i]) IN ['0..'9] DO
valid ← valid AND ValidFraction[v, c];
IF valid THEN {[v, valid] ← AppendDecimal[v, c]; exp ← exp-1}
ELSE NULL; -- should round
i ← i+1;
ENDLOOP};
valid ← TRUE;
IF c = 'e OR c = 'E THEN {
scale: INTEGER ← 0;
op: {plus, minus} ← $plus;
i ← i + 1;
SELECT s[i] FROM
'+ => i ← i+1;
'- => {op ← $minus; i ← i+1};
ENDCASE;
IF s[i] NOT IN ['0..'9] THEN valid ← FALSE;
WHILE (c ← s[i]) IN ['0..'9] DO
IF valid THEN [scale, valid] ← AppendToScale[scale, c];
i ← i+1;
ENDLOOP;
exp ← IF op = $plus THEN exp + scale ELSE exp - scale}; -- need overflow check
IF c # endMark THEN valid ← FALSE;
value ← Real.PairToReal[v, exp
! Real.RealException => {valid ← FALSE; RESUME}];
RETURN};
EnterLit: PROC[v: LongLit, long: BOOLTRUE] RETURNS[P1.Value] ~ {
vRep: ARRAY [0..LongLit.SIZE) OF WORDLOOPHOLE[v];
RETURN[[ref[IF long
THEN LiteralOps.FindDescriptor[DESCRIPTOR[vRep]]
ELSE LiteralOps.Find[vRep[0]]]]]
};
character and string constants
escapeMark: CHAR ~ '\\;
Escape: PROC RETURNS[c: CHAR, valid, advance: BOOLTRUE] ~ {
c ← char;
IF c = escapeMark THEN {
NextChar[];
SELECT char FROM
'n, 'N => c ← Ascii.CR;
'r, 'R => c ← Ascii.CR;
'l, 'L => c ← Ascii.LF;
't, 'T => c ← Ascii.TAB;
'b, 'B => c ← Ascii.BS;
'f, 'F => c ← Ascii.FF;
'', '", escapeMark => c ← char;
IN ['0 .. '7] => {
nc, v: CARDINAL ← 0;
DO
IF ~(char IN ['0..'7]) THEN {valid ← advance ← FALSE; EXIT};
v ← 8*v + Digit[char];
IF (nc ← nc+1) = 3 THEN EXIT;
NextChar[];
ENDLOOP;
IF v > 377b THEN {valid ← FALSE; v ← 0};
c ← v + 0c};
ENDCASE => valid ← advance ← FALSE};
RETURN};
initialization/finalization
ScanInit: PUBLIC PROC[
source: IO.STREAM,
logger: PROC [PROC[log: IO.STREAM]]] ~ {
stream ← source; Logger ← logger;
IF buffer = NIL THEN buffer ← NEW[TEXT[256]];
desc.base ← LOOPHOLE[buffer, LONG POINTER]; desc.offset ← 0;
iMax ← buffer.length ← buffer.maxLength;
streamOrigin ← stream.GetIndex[];
tB ← NEW[TextBuffer];
tOrigin ← tLimit ← 0; tMax ← 0; tEnded ← FALSE;
FillBuffer[]; char ← tB[tI]; qDot ← FALSE;
nTokens ← nErrors ← 0};
ScanStats: PUBLIC PROC RETURNS[NAT, NAT] ~ {
RETURN[nTokens, nErrors]};
ScanReset: PUBLIC PROC ~ {
IF buffer # NIL THEN FREE[@buffer];
IF tB # NIL THEN FREE[@tB];
stream ← NIL; Logger ← NIL};
error handling
ResetScanIndex: PUBLIC PROC[index: P1.Index] RETURNS[success: BOOL] ~ {
IF ~(index IN [tOrigin .. tLimit)) THEN {
page: CARDINAL ~ index/charsPerPage;
tOrigin ← tLimit ← page*charsPerPage;
tMax ← 0; tEnded ← FALSE;
stream.SetIndex[streamOrigin + tOrigin];
FillBuffer[]};
tI ← index - tOrigin;
IF tI >= tMax THEN FillBuffer[]; char ← tB[tI]; RETURN[TRUE]};
ErrorCode: TYPE ~ {number, string, char, atom, escape};
ScanError: PROC[code: ErrorCode, tokenIndex: P1.Index] ~ {
Inner: PROC[log: IO.STREAM] ~ {
ErrorContext[log,
SELECT code FROM
$number => "invalid number",
$string => "string unterminated or too long",
$char => "invalid character",
$atom => "invalid atom",
$escape => "invalid escape sequence",
ENDCASE => NIL,
tokenIndex];
log.PutChar['\n]};
nErrors ← nErrors + 1;
Logger[Inner]};
ErrorContext: PUBLIC PROC[
to: IO.STREAM, message: Rope.ROPE, tokenIndex: P1.Index] ~ {
saveIndex: P1.Index ~ stream.GetIndex[];
origin: P1.Index ~ streamOrigin + tokenIndex;
start, lineIndex: P1.Index ← origin;
char: CHAR;
n: [1..100];
FOR n IN [1..100] UNTIL lineIndex = 0 DO
lineIndex ← lineIndex - 1;
stream.SetIndex[lineIndex];
IF stream.GetChar[] = '\n THEN EXIT;
start ← lineIndex;
ENDLOOP;
stream.SetIndex[start];
FOR n IN [1..100] UNTIL stream.EndOf[] DO
char ← stream.GetChar[];
SELECT char FROM
'\n, '\032 => EXIT;
ENDCASE => to.PutChar[char];
ENDLOOP;
to.PutChar['\n];
stream.SetIndex[start];
UNTIL stream.GetIndex[] = origin OR stream.EndOf[] DO
char ← stream.GetChar[];
to.PutChar[IF char = '\t THEN '\t ELSE ' ];
ENDLOOP;
to.Put[IO.rope["^ "], IO.rope[message]];
to.Put[IO.rope[" ["], IO.card[tokenIndex]];
to.PutChar[']]; to.PutChar['\n];
stream.SetIndex[saveIndex]};
}.