MimScanner.mesa
Copyright Ó 1985, 1986, 1987, 1988, 1989, 1991 by Xerox Corporation. All rights reserved.
Satterthwaite, June 17, 1986 2:26:36 pm PDT
Russ Atkinson (RRA) January 4, 1991 1:45 am PST
DIRECTORY
Ascii USING [BS, CR, FF, LF, TAB],
ConstArith USING [Add, Compare, Const, Div, FromCard, Mod, Mul, Overflow, ToCard],
ConvertUnsafe USING [SubString],
IO USING [EndOfStream, GetChar, GetIndex, PutChar, PutF, PutRope, SetIndex, STREAM, UnsafeGetBlock],
LiteralOps USING [Find, FindString],
Literals USING [LitClass, LTIndex],
MimData USING [idDCARD, idDINT],
MimP1 USING [Index, Token, Value, nullValue],
MimZones USING [permZone],
ParseTable USING [endMarker, HashIndex, HashTableRef, IndexTableRef, InitHashTable, InitIndexTable, InitScanTable, InitVocabulary, ScanTableRef, tokenARROW, tokenATOM, tokenBIND, tokenCHAR, tokenDOT, tokenDOTS, tokenEQUAL, tokenFLNUM, tokenGE, tokenGREATER, tokenID, tokenLE, tokenLESS, tokenLSTR, tokenMINUS, tokenNE, tokenNUM, tokenPOWER, tokenSTR, tokenTILDE, TSymbol, VocabularyRef],
RefText USING [Append],
Rope USING [ROPE],
SourceMarks USING [Reset, StartSource],
SymbolOps USING [EncodeCard, EnterString],
Symbols USING [nullName, Type],
Target: TYPE MachineParms USING [maxChar, maxLongWord, newLineChar],
Tree USING [Link],
TreeOps USING [GetNode, MakeNode, PushTree, SetInfo];
MimScanner: PROGRAM
IMPORTS ConstArith, IO, LiteralOps, MimData, MimZones, ParseTable, RefText, SourceMarks, SymbolOps, TreeOps
EXPORTS MimP1 = {
OPEN ParseTable;
funny constants
otherDollarChar: CHAR = '\244; -- ¤ in the Xerox Character Code standard
leftDoubleAngleChar: CHAR = '\253; -- « in the Xerox Character Code standard
assignChar: CHAR = '\254; -- ¬ in the Xerox Character Code standard
uparrowChar: CHAR = '\255; -- ­ in the Xerox Character Code standard
multiplyChar: CHAR = '\264; -- ´ in the Xerox Character Code standard
divideChar: CHAR = '\270; -- ¸ in the Xerox Character Code standard
rightDoubleAngleChar: CHAR = '\273; -- » in the Xerox Character Code standard
table installation
hashTab: HashTableRef ¬ NIL;
scanTab: ScanTableRef ¬ NIL;
vocab: VocabularyRef ¬ NIL;
vocabIndex: IndexTableRef ¬ NIL;
InstallScanTable: PUBLIC PROC = {
IF vocab = NIL THEN {
scanTab ¬ ParseTable.InitScanTable[];
hashTab ¬ ParseTable.InitHashTable[];
vocabIndex ¬ ParseTable.InitIndexTable[];
vocab ¬ ParseTable.InitVocabulary[];
endClass ¬ FindClass["END"];
beginClass ¬ FindClass["BEGIN"];
};
};
scanner state
stream: IO.STREAM ¬ NIL; -- the input stream
streamOrigin: MimP1.Index; -- FileStream.FileByteIndex
Logger: PROC [PROC [log: IO.STREAM]] ¬ NIL;
textChars: NAT = 4096;
TextBuffer: TYPE = PACKED ARRAY [0..textChars) OF CHAR;
tB: REF TextBuffer ¬ NIL;
tI: [0..textChars] ¬ 0;
tMax: [0..textChars] ¬ 0;
tOrigin: MimP1.Index ¬ 0;
tLimit: MimP1.Index ¬ 0;
tEnded: BOOL ¬ FALSE;
FillBuffer: PROC = {
tOrigin ¬ tLimit;
IF tEnded
THEN tMax ¬ 0
ELSE {
tMax ¬ stream.UnsafeGetBlock[[LOOPHOLE[tB], 0, textChars]].nBytesRead;
IF tMax < textChars THEN tEnded ¬ TRUE;
tLimit ¬ tOrigin + tMax;
};
IF tMax = 0 THEN {tB[0] ¬ '\000; tMax ¬ 1};
tI ¬ 0;
};
buffer: REF TEXT ¬ NIL;  -- token assembly area
iMax: CARDINAL ¬ 0;  -- iMax = buffer.maxLength
desc: ConvertUnsafe.SubString; -- initial buffer segment
nTokens: NAT ¬ 0;    -- token count
nErrors: NAT ¬ 0;    -- lexical errors
BogusLiteral: ERROR = CODE;
BufferOverflow: ERROR = CODE;
maxBufferSize: NAT = 30000;
ExpandBuffer: PROC = {
oldBuffer: REF TEXT ¬ buffer;
len: NAT ¬ oldBuffer.length;
SELECT len FROM
= maxBufferSize => ERROR BufferOverflow;
> maxBufferSize/2 => len ¬ maxBufferSize;
ENDCASE => len ¬ len + len;
buffer ¬ NEW[TEXT[len]];
desc.base ¬ LOOPHOLE[buffer, LONG POINTER];
buffer ¬ RefText.Append[to: buffer, from: oldBuffer];
iMax ¬ buffer.length ¬ buffer.maxLength;
MimZones.permZone.FREE[@oldBuffer];
};
char: CHAR ¬ 0C; -- current (most recently scanned) character
qDot: BOOL ¬ FALSE; -- used to resolved decimal point vs. interval
NextChar: PROC = {
also expanded inline within Atom
IF (tI¬tI+1) = tMax THEN FillBuffer[];
char ¬ tB[tI];
};
NextCharInline: PROC RETURNS [eof: BOOL] = INLINE {
IF (tI¬tI+1) = tMax THEN {IF tEnded THEN RETURN [TRUE]; FillBuffer[]};
char ¬ tB[tI];
RETURN [FALSE];
};
NextToken: PUBLIC PROC RETURNS [token: MimP1.Token] = {
mark: ATOM ¬ NIL;
DO
WHILE char IN ['\000..' ] DO
SELECT char FROM
'\000 => {
\000\000 is Tioga format for end-of-text
IF NextCharInline[] THEN GO TO EndFile;
IF char = '\000 THEN GO TO EndFile;
};
ENDCASE =>
IF NextCharInline[] THEN GO TO EndFile;
ENDLOOP;
token.index ¬ tOrigin + tI;
token.value ¬ MimP1.nullValue;
{
SELECT char FROM
'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm,
'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z => {
i: CARDINAL ¬ 0;
DO
buffer[i] ¬ char;
IF (tI¬tI+1) = tMax THEN FillBuffer[];
char ¬ tB[tI];
SELECT char FROM
IN ['a..'z], IN ['A..'Z], IN ['0..'9] =>
IF (i ¬ i+1) >= iMax THEN ExpandBuffer[];
ENDCASE => EXIT;
ENDLOOP;
desc.length ¬ i+1;
token.class ¬ tokenID;
token.value.r ¬ SymbolOps.EnterString[desc];
GO TO GotNext;
};
'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M,
'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z => {
i: CARDINAL ¬ 0;
first, last: NAT ¬ char.ORD;
state: {uid, uidn, other} ¬ uid;
DO
buffer[i] ¬ char;
IF (tI¬tI+1) = tMax THEN FillBuffer[];
char ¬ tB[tI];
SELECT char FROM
IN ['A..'Z] => IF state = uidn THEN state ¬ other;
IN ['0..'9] => IF state = uid THEN state ¬ uidn;
IN ['a..'z] => state ¬ other;
ENDCASE => EXIT;
last ¬ char.ORD;
IF (i ¬ i+1) >= iMax THEN ExpandBuffer[];
ENDLOOP;
i ¬ i+1;
IF state # other THEN {
h: HashIndex ¬ ((first*128-first) + last) MOD HashIndex.LAST + 1;
j: CARDINAL;
WHILE (j ¬ hashTab[h].symbol) # 0 DO
s2: CARDINAL = vocabIndex[j-1];
IF vocabIndex[j]-s2 = i THEN
FOR s1: CARDINAL IN [0 .. i) DO
IF buffer[s1] # vocab.text[s2+s1] THEN EXIT;
REPEAT
FINISHED => {
token.class ¬ j;
SELECT j FROM
beginClass => mark ¬ $Begin;
endClass => mark ¬ $End;
ENDCASE;
GO TO GotNext;
};
ENDLOOP;
IF (h ¬ hashTab[h].link) = 0 THEN EXIT;
ENDLOOP;
};
desc.length ¬ i;
token.class ¬ tokenID;
token.value.r ¬ SymbolOps.EnterString[desc];
GO TO GotNext;
};
'0, '1, '2, '3, '4, '5, '6, '7, '8, '9 => {
{
ENABLE {
BogusLiteral => GO TO numberSyntax;
ConstArith.Overflow => GO TO numberOverflow;
};
[token.class, token.value] ¬ CollectNumber[i: 0, float: FALSE];
EXITS
numberSyntax => ScanError[$number, token.index];
numberOverflow => ScanError[$overflow, token.index];
};
GO TO GotNext;
};
': => {
NextChar[];
IF char = '=
THEN {token.class ¬ scanTab['←]; GO TO GetNext}
ELSE {token.class ¬ scanTab[':]; GO TO GotNext};
};
'{ => {
The start of a block or enumerated type
token.class ¬ scanTab[char];
mark ¬ $Begin;
GO TO GetNext;
};
'} => {
The end of a block or enumerated type
token.class ¬ scanTab[char];
mark ¬ $End;
GO TO GetNext;
};
'; => {
token.class ¬ scanTab[char];
mark ¬ $Semi;
GO TO GetNext;
};
'* => {
token.class ¬ scanTab[char];
NextChar[];
IF char = '* THEN {token.class ¬ tokenPOWER; GO TO GetNext};
GO TO GotNext;
};
',, ';, '#, '+, '/, '@, '!, '(, '), '[, '], '←, '^, '| => {
token.class ¬ scanTab[char];
GO TO GetNext;
};
'' => {
c: CHAR;
valid, advance: BOOL;
NextChar[];
[c, valid, advance] ¬ Escape[];
IF NOT valid THEN ScanError[$escape, token.index + 1];
token.class ¬ tokenCHAR;
token.value.r ¬ LiteralOps.Find[either, SymbolOps.EncodeCard[c.ORD]];
IF advance THEN GO TO GetNext ELSE GO TO GotNext;
};
'" => {
i: CARDINAL ¬ 0;
valid: BOOL;
advance: BOOL ¬ TRUE;
DO
IF advance THEN
IF NextCharInline[] THEN GO TO EOFEnd;
SELECT char FROM
'" => {
IF (tI¬tI+1) = tMax THEN FillBuffer[];
char ¬ tB[tI];
IF char # '" THEN GO TO QuoteEnd;
};
ENDCASE;
IF i >= iMax THEN ExpandBuffer[
! BufferOverflow => {ScanError[$string, token.index]; i ¬ 0; CONTINUE}];
[buffer[i], valid, advance] ¬ Escape[];
i ¬ i+1;
IF NOT valid THEN ScanError[$escape, tOrigin + tI];
REPEAT
QuoteEnd => NULL;
EOFEnd => {ScanError[$string, token.index]; FillBuffer[]; char ¬ tB[tI]};
ENDLOOP;
desc.length ¬ i;
token.value.r ¬ LiteralOps.FindString[desc];
SELECT char FROM
'L, 'l => {token.class ¬ tokenLSTR; GO TO GetNext};
'G, 'g => {token.class ¬ tokenSTR; GO TO GetNext};
ENDCASE => {token.class ¬ tokenSTR; GO TO GotNext};
};
'$, otherDollarChar => {
i: CARDINAL ¬ 0;
NextChar[];
token.class ¬ tokenATOM;
token.value.r ¬ Symbols.nullName;
SELECT char FROM
IN ['a..'z], IN ['A..'Z] => NULL;
ENDCASE => {
ScanError[$atom, token.index];
GO TO GotNext;
};
DO
SELECT char FROM
IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {
IF i >= iMax THEN ExpandBuffer[]; buffer[i] ¬ char; i ¬ i+1};
ENDCASE => EXIT;
NextChar[];
ENDLOOP;
desc.length ¬ i;
token.value.r ¬ SymbolOps.EnterString[desc];
GO TO GotNext;
};
'- => {
NextChar[];
IF char # '- THEN {token.class ¬ tokenMINUS; GO TO GotNext};
char ¬ '\000;
DO
pChar: CHAR = char;
IF NextCharInline[] THEN GO TO EndFile;
SELECT char FROM
'- => IF pChar = '- THEN EXIT;
Ascii.CR, Ascii.LF, '\n => EXIT;
ENDCASE;
ENDLOOP;
NextChar[];
};
'. => {
IF qDot THEN {
qDot ¬ FALSE;
token.index ¬ token.index-1;
token.class ¬ tokenDOTS;
GO TO GetNext;
};
NextChar[];
SELECT char FROM
'. => {token.class ¬ tokenDOTS; GO TO GetNext};
IN ['0..'9] => {
buffer[0] ¬ '.;
{
ENABLE {
BogusLiteral => GO TO numberSyntax;
ConstArith.Overflow => GO TO numberOverflow;
};
[token.class, token.value] ¬ CollectNumber[i: 1, float: TRUE];
EXITS
numberSyntax => ScanError[$number, token.index];
numberOverflow => ScanError[$overflow, token.index];
};
GO TO GotNext;
};
ENDCASE => {token.class ¬ tokenDOT; GO TO GotNext};
};
'= => {
NextChar[];
IF char = '>
THEN {token.class ¬ tokenARROW; GO TO GetNext}
ELSE {token.class ¬ tokenEQUAL; GO TO GotNext};
};
'< => {
NextChar[];
SELECT char FROM
'= => {token.class ¬ tokenLE; GO TO GetNext};
'< => GO TO ScanComment;
ENDCASE => {token.class ¬ tokenLESS; GO TO GotNext};
};
'> => {
NextChar[];
IF char = '=
THEN {token.class ¬ tokenGE; GO TO GetNext}
ELSE {token.class ¬ tokenGREATER; GO TO GotNext};
};
'~ => {
NextChar[];
SELECT char FROM
'= => {token.class ¬ tokenNE; GO TO GetNext};
'< => {token.class ¬ tokenGE; GO TO GetNext};
'> => {token.class ¬ tokenLE; GO TO GetNext};
'~ => {token.class ¬ tokenBIND; GO TO GetNext};
ENDCASE => {token.class ¬ tokenTILDE; GO TO GotNext};
};
leftDoubleAngleChar => GO TO ScanComment;
assignChar => {token.class ¬ scanTab['←]; GO TO GetNext};
uparrowChar => {token.class ¬ scanTab['^]; GO TO GetNext};
multiplyChar => {token.class ¬ scanTab['*]; GO TO GetNext};
divideChar => {token.class ¬ scanTab['/]; GO TO GetNext};
ENDCASE => {
token.class ¬ IF char < 200C THEN scanTab[char] ELSE 0;
IF token.class # 0 THEN GO TO GetNext;
NextChar[];
ScanError[$char, token.index];
};
EXITS ScanComment => {
state: {plain, leftBrocket, rightBrocket} ¬ plain;
nest: CARDINAL ¬ 1;
DO
IF NextCharInline[] THEN GO TO EndFile;
SELECT char FROM
'> => SELECT state FROM
plain, leftBrocket => state ¬ rightBrocket;
rightBrocket => {state ¬ plain; IF (nest ¬ nest - 1) = 0 THEN EXIT};
ENDCASE;
'< => SELECT state FROM
plain, rightBrocket => state ¬ leftBrocket;
leftBrocket => {state ¬ plain; nest ¬ nest + 1};
ENDCASE;
leftDoubleAngleChar => {state ¬ plain; nest ¬ nest + 1};
rightDoubleAngleChar => {state ¬ plain; IF (nest ¬ nest - 1) = 0 THEN EXIT};
ENDCASE => state ¬ plain;
ENDLOOP;
NextChar[];
};
};
REPEAT
GetNext => {IF (tI¬tI+1) = tMax THEN FillBuffer[]; char ¬ tB[tI]};
GotNext => {};
EndFile => {
token.class ¬ endMarker;
token.index ¬ tOrigin + (tI-1);
token.value ¬ MimP1.nullValue;
UNTIL tEnded DO FillBuffer[] ENDLOOP; -- flush stream
FillBuffer[];
char ¬ tB[tI];
};
ENDLOOP;
IF token.class # endMarker THEN
SourceMarks.StartSource[token.index, mark];
nTokens ¬ nTokens + 1;
};
numerical conversion
LongLit: TYPE = ConstArith.Const;
endMark: CHAR = '\000;
CollectNumber: PROC
[i: CARDINAL, float: BOOL] RETURNS [class: TSymbol, value: MimP1.Value] = {
hexCount: NAT ¬ 0;
hexSig: PACKED ARRAY CHAR['a..'h] OF {F, T} ¬ ALL[F];
v: LongLit ¬ const0;
Accept: PROC = INLINE {
buffer[i] ¬ char;
IF (i ¬ i+1) >= iMax THEN ExpandBuffer[];
NextChar[];
};
class ¬ tokenNUM;
DO
SELECT char FROM
IN ['0..'9] => Accept[];
'e, 'E => {
hexSig['e] ¬ T;
hexCount ¬ hexCount + 1;
Accept[];
IF hexCount = 1 AND char = '+ OR char = '- THEN {float ¬ TRUE; Accept[]};
};
IN ['a..'f] => {hexSig[char] ¬ T; hexCount ¬ hexCount+1; Accept[]};
IN ['A..'F] => {
hexSig[char+('a-'A)] ¬ T;
hexCount ¬ hexCount+1;
Accept[];
};
'h, 'H => {
hexSig['h] ¬ T;
hexCount ¬ hexCount+1;
Accept[];
};
'. => {
IF hexCount # 0 OR float THEN EXIT;
NextChar[];
IF char = '. THEN {qDot ¬ TRUE; EXIT};
float ¬ TRUE;
buffer[i] ¬ '.;
IF (i ¬ i+1) >= iMax THEN ExpandBuffer[];
};
ENDCASE => EXIT;
ENDLOOP;
buffer[i] ¬ endMark;
SELECT TRUE FROM
float => GO TO floatExit;
(hexSig['h] = T) => v ¬ ScanHex[buffer];
hexCount = 0 => v ¬ ScanDecimal[buffer];
hexCount # 1 => v ¬ ScanHex[buffer];
ENDCASE =>
SELECT hexSig FROM
[F,T,F,F,F,F,F,F] => v ¬ ScanOctal[buffer];
[F,F,T,F,F,F,F,F] => class ¬ tokenCHAR;
[F,F,F,T,F,F,F,F] => v ¬ ScanDecimal[buffer];
[F,F,F,F,T,F,F,F] => GO TO floatExit;
ENDCASE => v ¬ ScanHex[buffer];
SELECT class FROM
tokenCHAR => {
i: CARDINAL ¬ 0;
c: CHAR ¬ buffer[i];
val: CARDINAL ¬ 0;
over: BOOL ¬ FALSE;
IF c NOT IN ['0..'7] THEN GO TO bogus;
WHILE (c ¬ buffer[i]) IN ['0..'7] DO
d: [0..7] = Digit[c];
val ¬ val*8 + d;
i ¬ i+1;
IF val > Target.maxChar THEN over ¬ TRUE;
ENDLOOP;
IF c = 'c OR c = 'C THEN c ¬ buffer[i¬i+1] ELSE GO TO bogus;
IF over THEN ERROR ConstArith.Overflow;
IF c # endMark THEN GO TO bogus;
value ¬ [ref[LiteralOps.Find[either, SymbolOps.EncodeCard[val]]]];
EXITS bogus => ERROR BogusLiteral;
};
ENDCASE => {
lastLongInt: CARD = Target.maxLongWord / 2;
IF ConstArith.Compare[v, constSplit] # less
THEN {
hiC: CARD = ConstArith.ToCard[ConstArith.Div[v, constSplit]];
hiV: Literals.LTIndex = LiteralOps.Find[unsigned, SymbolOps.EncodeCard[hiC]];
hiT: Tree.Link = [literal[hiV]];
loC: CARD = ConstArith.ToCard[ConstArith.Mod[v, constSplit]];
loV: Literals.LTIndex = LiteralOps.Find[unsigned, SymbolOps.EncodeCard[loC]];
loT: Tree.Link = [literal[loV]];
resT: Tree.Link;
type: Symbols.Type = IF hiC > lastLongInt
THEN MimData.idDCARD
ELSE MimData.idDINT;
TreeOps.PushTree[hiT];
TreeOps.PushTree[loT];
TreeOps.SetInfo[LOOPHOLE[type]];
resT ¬ TreeOps.MakeNode[mwconst, 2];
value ¬ [ref[TreeOps.GetNode[resT]]];
}
ELSE {
litClass: Literals.LitClass ¬ either;
card: CARD ¬ ConstArith.ToCard[v];
IF card > lastLongInt THEN litClass ¬ unsigned;
value ¬ [ref[LiteralOps.Find[litClass, SymbolOps.EncodeCard[card]]]];
};
};
EXITS floatExit => {
class ¬ tokenFLNUM;
desc.length ¬ i;
value.r ¬ LOOPHOLE[LiteralOps.FindString[desc]];
};
};
Digit: ARRAY CHAR ['0..'9] OF [0..9] = [0,1,2,3,4,5,6,7,8,9];
HexDigit: ARRAY CHAR ['A..'F] OF [0..15] = [10,11,12,13,14,15];
the following 5 procs assume that the host CARD is good for at least 32 bits
const0: LongLit = ConstArith.FromCard[0];
const8: LongLit = ConstArith.FromCard[8];
const10: LongLit = ConstArith.FromCard[10];
const16: LongLit = ConstArith.FromCard[16];
constSplit: LongLit = ConstArith.Add[
ConstArith.FromCard[CARD.LAST], ConstArith.FromCard[1]];
AppendToScale: PROC [v: CARDINAL, digit: CHAR ['0..'9]] RETURNS [CARDINAL] = {
d: [0..9] = Digit[digit];
next: CARDINAL = v*10 + d;
IF next < v THEN ERROR ConstArith.Overflow;
RETURN [next];
};
ScanDecimal: PROC [s: REF TEXT] RETURNS [LongLit] = {
v: LongLit ¬ const0;
{
i: CARDINAL ¬ 0;
c: CHAR ¬ s[i];
IF c NOT IN ['0..'9] THEN GO TO bogus;
DO
d: [0..9] = Digit[c];
v ¬ ConstArith.Add[ConstArith.Mul[const10, v], ConstArith.FromCard[d]];
i ¬ i+1;
c ¬ s[i];
IF c NOT IN ['0..'9] THEN EXIT;
ENDLOOP;
SELECT c FROM
'd, 'D => {
scale: CARDINAL ¬ 0;
DO
i ¬ i+1;
c ¬ s[i];
IF c NOT IN ['0..'9] THEN EXIT;
scale ¬ AppendToScale[scale, c];
ENDLOOP;
THROUGH [1 .. scale] DO v ¬ ConstArith.Mul[const10, v]; ENDLOOP;
};
ENDCASE;
IF c # endMark THEN GO TO bogus;
EXITS bogus => ERROR BogusLiteral;
};
RETURN [v];
};
ScanOctal: PROC [s: REF TEXT] RETURNS [LongLit] = {
v: LongLit ¬ const0;
{
i: CARDINAL ¬ 0;
c: CHAR ¬ s[i];
IF c NOT IN ['0..'7] THEN GO TO bogus;
DO
d: [0..7] = Digit[c];
v ¬ ConstArith.Add[ConstArith.Mul[const8, v], ConstArith.FromCard[d]];
i ¬ i+1;
c ¬ s[i];
IF c NOT IN ['0..'7] THEN EXIT;
ENDLOOP;
SELECT c FROM
'b, 'B => {
scale: CARDINAL ¬ 0;
DO
i ¬ i+1;
c ¬ s[i];
IF c NOT IN ['0..'9] THEN EXIT;
scale ¬ AppendToScale[scale, c];
ENDLOOP;
THROUGH [1 .. scale] DO v ¬ ConstArith.Mul[const8, v]; ENDLOOP
};
ENDCASE;
IF c # endMark THEN GO TO bogus;
EXITS bogus => ERROR BogusLiteral;
};
RETURN [v];
};
ScanHex: PROC [s: REF TEXT] RETURNS [LongLit] = {
v: LongLit ¬ const0;
{
i: CARDINAL ¬ 0;
c: CHAR ¬ s[i];
IF c NOT IN ['0..'9] THEN GO TO bogus;
DO
d: [0..15] ¬ 0;
SELECT c FROM
IN ['0..'9] => d ¬ Digit[c];
IN ['A..'F] => d ¬ HexDigit[c];
IN ['a..'f] => d ¬ HexDigit[VAL[(c.ORD-'a.ORD)+'A.ORD]];
ENDCASE => EXIT;
v ¬ ConstArith.Add[ConstArith.Mul[const16, v], ConstArith.FromCard[d]];
i ¬ i + 1;
c ¬ s[i];
ENDLOOP;
IF c = 'h OR c = 'H THEN {
scale: CARDINAL ¬ 0;
WHILE (c ¬ s[i¬i+1]) IN ['0..'9] DO scale ¬ AppendToScale[scale, c]; ENDLOOP;
THROUGH [1 .. scale] DO v ¬ ConstArith.Mul[const16, v]; ENDLOOP;
};
IF c # endMark THEN GO TO bogus;
EXITS bogus => ERROR BogusLiteral;
};
RETURN [v];
};
character and string constants
escapeMark: CHAR = '\\;
Escape: PROC RETURNS [c: CHAR, valid, advance: BOOL¬TRUE] = {
c ¬ char;
IF c = escapeMark THEN {
NextChar[];
SELECT char FROM
'n, 'N => c ¬ Target.newLineChar;
'r, 'R => c ¬ Ascii.CR;
'l, 'L => c ¬ Ascii.LF;
't, 'T => c ¬ Ascii.TAB;
'b, 'B => c ¬ Ascii.BS;
'f, 'F => c ¬ Ascii.FF;
'', '", escapeMark => c ¬ char;
IN ['0 .. '7] => {
nc, v: CARDINAL ¬ 0;
DO
IF NOT (char IN ['0..'7]) THEN {valid ¬ advance ¬ FALSE; EXIT};
v ¬ 8*v + Digit[char];
IF (nc ¬ nc+1) = 3 THEN EXIT;
NextChar[];
ENDLOOP;
IF v > Target.maxChar THEN {valid ¬ FALSE; v ¬ 0};
c ¬ v + 0c;
};
ENDCASE => valid ¬ advance ¬ FALSE
};
};
initialization/finalization
ScanInit: PUBLIC PROC [source: IO.STREAM, logger: PROC [PROC [log: IO.STREAM]]] = {
stream ¬ source;
Logger ¬ logger;
SourceMarks.Reset[];
IF buffer = NIL OR buffer.length # 256 THEN buffer ¬ MimZones.permZone.NEW[TEXT[256]];
desc.base ¬ LOOPHOLE[buffer, LONG POINTER]; desc.offset ¬ 0;
iMax ¬ buffer.length ¬ buffer.maxLength;
streamOrigin ¬ stream.GetIndex[];
tB ¬ MimZones.permZone.NEW[TextBuffer];
tOrigin ¬ tLimit ¬ 0;
tMax ¬ 0;
tEnded ¬ qDot ¬ FALSE;
FillBuffer[];
char ¬ tB[tI];
nTokens ¬ nErrors ¬ 0;
};
ScanStats: PUBLIC PROC RETURNS [NAT, NAT] = {
RETURN [nTokens, nErrors];
};
ScanReset: PUBLIC PROC = {
MimZones.permZone.FREE[@buffer];
IF tB # NIL THEN MimZones.permZone.FREE[@tB];
stream ¬ NIL;
Logger ¬ NIL;
};
error handling
ResetScanIndex: PUBLIC PROC [index: MimP1.Index] RETURNS [success: BOOL] = {
IF NOT (index IN [tOrigin .. tLimit)) THEN {
page: CARDINAL = index/textChars;
tOrigin ¬ tLimit ¬ page*textChars;
tMax ¬ 0;
tEnded ¬ FALSE;
stream.SetIndex[streamOrigin + tOrigin];
FillBuffer[];
};
tI ¬ index - tOrigin;
IF tI >= tMax THEN FillBuffer[];
char ¬ tB[tI];
RETURN [TRUE];
};
ErrorCode: TYPE = {overflow, number, string, char, atom, escape};
ScanError: PROC [code: ErrorCode, tokenIndex: MimP1.Index] = {
Inner: PROC [log: IO.STREAM] = {
ErrorContext[log,
SELECT code FROM
$overflow => "number too large",
$number => "invalid number",
$string => "string unterminated or too long",
$char => "invalid character",
$atom => "invalid atom",
$escape => "invalid escape sequence",
ENDCASE => NIL,
tokenIndex];
log.PutChar['\n];
};
nErrors ¬ nErrors + 1;
Logger[Inner];
};
ErrorContext: PUBLIC PROC [to: IO.STREAM, message: Rope.ROPE, tokenIndex: MimP1.Index] = {
RRA: make sure that this routine essentially matches MimosaLogImpl.PrintTextLine
saveIndex: MimP1.Index = IO.GetIndex[stream];
origin: MimP1.Index = streamOrigin + tokenIndex;
start: MimP1.Index ¬ origin;
lineIndex: MimP1.Index ¬ origin;
shown: BOOL ¬ FALSE;
FOR n: [1..100] IN [1..100] UNTIL lineIndex = 0 DO
lineIndex ¬ lineIndex - 1;
IO.SetIndex[stream, lineIndex];
SELECT IO.GetChar[stream] FROM
Ascii.CR, Ascii.LF, '\n => EXIT;
ENDCASE => start ¬ lineIndex;
ENDLOOP;
IO.SetIndex[stream, start];
FOR n: [1..100] IN [1..100] DO
char: CHAR ¬ IO.GetChar[stream ! IO.EndOfStream => EXIT];
IF start+n > origin THEN
IF NOT shown THEN {
This marks the spot where the error is.
IO.PutRope[to, " <<!!>> "];
shown ¬ TRUE;
};
SELECT char FROM
Ascii.CR, Ascii.LF, '\n => EXIT;
ENDCASE => IO.PutChar[to, char];
ENDLOOP;
IO.PutF[to, "\n[%d] %g\n", [integer[origin]], [rope[message]]];
IO.SetIndex[stream, saveIndex];
};
Block source stuff
FindClass: PROC [string: STRING] RETURNS [CARDINAL] = {
len: CARDINAL = string.length;
firstChar: NAT ¬ string[0].ORD;
lastChar: NAT ¬ string[len-1].ORD;
h: HashIndex ¬ ((firstChar*128-firstChar) + lastChar) MOD HashIndex.LAST + 1;
j: CARDINAL;
WHILE (j ¬ hashTab[h].symbol) # 0 DO
s2: CARDINAL ¬ vocabIndex[j-1];
IF vocabIndex[j]-s2 = len THEN
FOR s1: CARDINAL IN [0 .. len) DO
IF string[s1] # vocab.text[s2+s1] THEN EXIT;
REPEAT
FINISHED => RETURN [j];
ENDLOOP;
IF (h ¬ hashTab[h].link) = 0 THEN EXIT;
ENDLOOP;
ERROR;
};
endClass: CARDINAL;
beginClass: CARDINAL;
}.