[_CDCSL_93-16_]<1>Cedar>release>MimosaOnly>MimScanner.mesa

MimScanner.mesa

Satterthwaite, June 17, 1986 2:26:36 pm PDT

Russ Atkinson (RRA) January 4, 1991 1:45 am PST

DIRECTORY

Ascii USING [BS, CR, FF, LF, TAB],

ConstArith USING [Add, Compare, Const, Div, FromCard, Mod, Mul, Overflow, ToCard],

ConvertUnsafe USING [SubString],

IO USING [EndOfStream, GetChar, GetIndex, PutChar, PutF, PutRope, SetIndex, STREAM, UnsafeGetBlock],

LiteralOps USING [Find, FindString],

Literals USING [LitClass, LTIndex],

MimData USING [idDCARD, idDINT],

MimP1 USING [Index, Token, Value, nullValue],

MimZones USING [permZone],

ParseTable USING [endMarker, HashIndex, HashTableRef, IndexTableRef, InitHashTable, InitIndexTable, InitScanTable, InitVocabulary, ScanTableRef, tokenARROW, tokenATOM, tokenBIND, tokenCHAR, tokenDOT, tokenDOTS, tokenEQUAL, tokenFLNUM, tokenGE, tokenGREATER, tokenID, tokenLE, tokenLESS, tokenLSTR, tokenMINUS, tokenNE, tokenNUM, tokenPOWER, tokenSTR, tokenTILDE, TSymbol, VocabularyRef],

RefText USING [Append],

Rope USING [ROPE],

SourceMarks USING [Reset, StartSource],

SymbolOps USING [EncodeCard, EnterString],

Symbols USING [nullName, Type],

Target: TYPE MachineParms USING [maxChar, maxLongWord, newLineChar],

Tree USING [Link],

TreeOps USING [GetNode, MakeNode, PushTree, SetInfo];

MimScanner: PROGRAM

IMPORTS ConstArith, IO, LiteralOps, MimData, MimZones, ParseTable, RefText, SourceMarks, SymbolOps, TreeOps

EXPORTS MimP1 = {

OPEN ParseTable;

funny constants

otherDollarChar: CHAR = '\244; -- ¤ in the Xerox Character Code standard

leftDoubleAngleChar: CHAR = '\253; -- Ť in the Xerox Character Code standard

assignChar: CHAR = '\254; -- Ź in the Xerox Character Code standard

uparrowChar: CHAR = '\255; -- in the Xerox Character Code standard

multiplyChar: CHAR = '\264; -- ´ in the Xerox Character Code standard

divideChar: CHAR = '\270; -- ¸ in the Xerox Character Code standard

rightDoubleAngleChar: CHAR = '\273; -- ť in the Xerox Character Code standard

table installation

hashTab: HashTableRef Ź NIL;

scanTab: ScanTableRef Ź NIL;

vocab: VocabularyRef Ź NIL;

vocabIndex: IndexTableRef Ź NIL;

InstallScanTable: PUBLIC PROC = {

IF vocab = NIL THEN {

scanTab Ź ParseTable.InitScanTable[];

hashTab Ź ParseTable.InitHashTable[];

vocabIndex Ź ParseTable.InitIndexTable[];

vocab Ź ParseTable.InitVocabulary[];

endClass Ź FindClass["END"];

beginClass Ź FindClass["BEGIN"];

};

scanner state

stream: IO.STREAM Ź NIL; -- the input stream

streamOrigin: MimP1.Index; -- FileStream.FileByteIndex

Logger: PROC [PROC [log: IO.STREAM]] Ź NIL;

textChars: NAT = 4096;

TextBuffer: TYPE = PACKED ARRAY [0..textChars) OF CHAR;

tB: REF TextBuffer Ź NIL;

tI: [0..textChars] Ź 0;

tMax: [0..textChars] Ź 0;

tOrigin: MimP1.Index Ź 0;

tLimit: MimP1.Index Ź 0;

tEnded: BOOL Ź FALSE;

FillBuffer: PROC = {

tOrigin Ź tLimit;

IF tEnded

THEN tMax Ź 0

ELSE {

tMax Ź stream.UnsafeGetBlock[[LOOPHOLE[tB], 0, textChars]].nBytesRead;

IF tMax < textChars THEN tEnded Ź TRUE;

tLimit Ź tOrigin + tMax;

};

IF tMax = 0 THEN {tB[0] Ź '\000; tMax Ź 1};

tI Ź 0;

};

buffer: REF TEXT Ź NIL; -- token assembly area

iMax: CARDINAL Ź 0; -- iMax = buffer.maxLength

desc: ConvertUnsafe.SubString; -- initial buffer segment

nTokens: NAT Ź 0; -- token count

nErrors: NAT Ź 0; -- lexical errors

BogusLiteral: ERROR = CODE;

BufferOverflow: ERROR = CODE;

maxBufferSize: NAT = 30000;

ExpandBuffer: PROC = {

oldBuffer: REF TEXT Ź buffer;

len: NAT Ź oldBuffer.length;

SELECT len FROM

= maxBufferSize => ERROR BufferOverflow;

> maxBufferSize/2 => len Ź maxBufferSize;

ENDCASE => len Ź len + len;

buffer Ź NEW[TEXT[len]];

desc.base Ź LOOPHOLE[buffer, LONG POINTER];

buffer Ź RefText.Append[to: buffer, from: oldBuffer];

iMax Ź buffer.length Ź buffer.maxLength;

MimZones.permZone.FREE[@oldBuffer];

};

char: CHAR Ź 0C; -- current (most recently scanned) character

qDot: BOOL Ź FALSE; -- used to resolved decimal point vs. interval

NextChar: PROC = {

also expanded inline within Atom

IF (tIŹtI+1) = tMax THEN FillBuffer[];

char Ź tB[tI];

};

NextCharInline: PROC RETURNS [eof: BOOL] = INLINE {

IF (tIŹtI+1) = tMax THEN {IF tEnded THEN RETURN [TRUE]; FillBuffer[]};

char Ź tB[tI];

RETURN [FALSE];

};

NextToken: PUBLIC PROC RETURNS [token: MimP1.Token] = {

mark: ATOM Ź NIL;

WHILE char IN ['\000..' ] DO

SELECT char FROM

'\000 => {

\000\000 is Tioga format for end-of-text

IF NextCharInline[] THEN GO TO EndFile;

IF char = '\000 THEN GO TO EndFile;

};

ENDCASE =>

IF NextCharInline[] THEN GO TO EndFile;

ENDLOOP;

token.index Ź tOrigin + tI;

token.value Ź MimP1.nullValue;

{

SELECT char FROM

'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm,
'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z => {

i: CARDINAL Ź 0;

buffer[i] Ź char;

IF (tIŹtI+1) = tMax THEN FillBuffer[];

char Ź tB[tI];

SELECT char FROM

IN ['a..'z], IN ['A..'Z], IN ['0..'9] =>

IF (i Ź i+1) >= iMax THEN ExpandBuffer[];

ENDCASE => EXIT;

ENDLOOP;

desc.length Ź i+1;

token.class Ź tokenID;

token.value.r Ź SymbolOps.EnterString[desc];

GO TO GotNext;

};

'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M,
'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z => {

i: CARDINAL Ź 0;

first, last: NAT Ź char.ORD;

state: {uid, uidn, other} Ź uid;

buffer[i] Ź char;

IF (tIŹtI+1) = tMax THEN FillBuffer[];

char Ź tB[tI];

SELECT char FROM

IN ['A..'Z] => IF state = uidn THEN state Ź other;

IN ['0..'9] => IF state = uid THEN state Ź uidn;

IN ['a..'z] => state Ź other;

ENDCASE => EXIT;

last Ź char.ORD;

IF (i Ź i+1) >= iMax THEN ExpandBuffer[];

ENDLOOP;

i Ź i+1;

IF state # other THEN {

h: HashIndex Ź ((first*128-first) + last) MOD HashIndex.LAST + 1;

j: CARDINAL;

WHILE (j Ź hashTab[h].symbol) # 0 DO

s2: CARDINAL = vocabIndex[j-1];

IF vocabIndex[j]-s2 = i THEN

FOR s1: CARDINAL IN [0 .. i) DO

IF buffer[s1] # vocab.text[s2+s1] THEN EXIT;

REPEAT

FINISHED => {

token.class Ź j;

SELECT j FROM

beginClass => mark Ź $Begin;

endClass => mark Ź $End;

ENDCASE;

GO TO GotNext;

};

ENDLOOP;

IF (h Ź hashTab[h].link) = 0 THEN EXIT;

ENDLOOP;

};

desc.length Ź i;

token.class Ź tokenID;

token.value.r Ź SymbolOps.EnterString[desc];

GO TO GotNext;

};

'0, '1, '2, '3, '4, '5, '6, '7, '8, '9 => {

{

ENABLE {

BogusLiteral => GO TO numberSyntax;

ConstArith.Overflow => GO TO numberOverflow;

};

[token.class, token.value] Ź CollectNumber[i: 0, float: FALSE];

EXITS

numberSyntax => ScanError[$number, token.index];

numberOverflow => ScanError[$overflow, token.index];

};

GO TO GotNext;

};

': => {

NextChar[];

IF char = '=

THEN {token.class Ź scanTab['←]; GO TO GetNext}

ELSE {token.class Ź scanTab[':]; GO TO GotNext};

};

'{ => {

The start of a block or enumerated type

token.class Ź scanTab[char];

mark Ź $Begin;

GO TO GetNext;

};

'} => {

The end of a block or enumerated type

token.class Ź scanTab[char];

mark Ź $End;

GO TO GetNext;

};

'; => {

token.class Ź scanTab[char];

mark Ź $Semi;

GO TO GetNext;

};

'* => {

token.class Ź scanTab[char];

NextChar[];

IF char = '* THEN {token.class Ź tokenPOWER; GO TO GetNext};

GO TO GotNext;

};

',, ';, '#, '+, '/, '@, '!, '(, '), '[, '], '←, '^, '| => {

token.class Ź scanTab[char];

GO TO GetNext;

};

'' => {

c: CHAR;

valid, advance: BOOL;

NextChar[];

[c, valid, advance] Ź Escape[];

IF NOT valid THEN ScanError[$escape, token.index + 1];

token.class Ź tokenCHAR;

token.value.r Ź LiteralOps.Find[either, SymbolOps.EncodeCard[c.ORD]];

IF advance THEN GO TO GetNext ELSE GO TO GotNext;

};

'" => {

i: CARDINAL Ź 0;

valid: BOOL;

advance: BOOL Ź TRUE;

IF advance THEN

IF NextCharInline[] THEN GO TO EOFEnd;

SELECT char FROM

'" => {

IF (tIŹtI+1) = tMax THEN FillBuffer[];

char Ź tB[tI];

IF char # '" THEN GO TO QuoteEnd;

};

ENDCASE;

IF i >= iMax THEN ExpandBuffer[

! BufferOverflow => {ScanError[$string, token.index]; i Ź 0; CONTINUE}];

[buffer[i], valid, advance] Ź Escape[];

i Ź i+1;

IF NOT valid THEN ScanError[$escape, tOrigin + tI];

REPEAT

QuoteEnd => NULL;

EOFEnd => {ScanError[$string, token.index]; FillBuffer[]; char Ź tB[tI]};

ENDLOOP;

desc.length Ź i;

token.value.r Ź LiteralOps.FindString[desc];

SELECT char FROM

'L, 'l => {token.class Ź tokenLSTR; GO TO GetNext};

'G, 'g => {token.class Ź tokenSTR; GO TO GetNext};

ENDCASE => {token.class Ź tokenSTR; GO TO GotNext};

};

'$, otherDollarChar => {

i: CARDINAL Ź 0;

NextChar[];

token.class Ź tokenATOM;

token.value.r Ź Symbols.nullName;

SELECT char FROM

IN ['a..'z], IN ['A..'Z] => NULL;

ENDCASE => {

ScanError[$atom, token.index];

GO TO GotNext;

};

SELECT char FROM

IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {

IF i >= iMax THEN ExpandBuffer[]; buffer[i] Ź char; i Ź i+1};

ENDCASE => EXIT;

NextChar[];

ENDLOOP;

desc.length Ź i;

token.value.r Ź SymbolOps.EnterString[desc];

GO TO GotNext;

};

'- => {

NextChar[];

IF char # '- THEN {token.class Ź tokenMINUS; GO TO GotNext};

char Ź '\000;

pChar: CHAR = char;

IF NextCharInline[] THEN GO TO EndFile;

SELECT char FROM

'- => IF pChar = '- THEN EXIT;

Ascii.CR, Ascii.LF, '\n => EXIT;

ENDCASE;

ENDLOOP;

NextChar[];

};

'. => {

IF qDot THEN {

qDot Ź FALSE;

token.index Ź token.index-1;

token.class Ź tokenDOTS;

GO TO GetNext;

};

NextChar[];

SELECT char FROM

'. => {token.class Ź tokenDOTS; GO TO GetNext};

IN ['0..'9] => {

buffer[0] Ź '.;

{

ENABLE {

BogusLiteral => GO TO numberSyntax;

ConstArith.Overflow => GO TO numberOverflow;

};

[token.class, token.value] Ź CollectNumber[i: 1, float: TRUE];

EXITS

numberSyntax => ScanError[$number, token.index];

numberOverflow => ScanError[$overflow, token.index];

};

GO TO GotNext;

};

ENDCASE => {token.class Ź tokenDOT; GO TO GotNext};

};

'= => {

NextChar[];

IF char = '>

THEN {token.class Ź tokenARROW; GO TO GetNext}

ELSE {token.class Ź tokenEQUAL; GO TO GotNext};

};

'< => {

NextChar[];

SELECT char FROM

'= => {token.class Ź tokenLE; GO TO GetNext};

'< => GO TO ScanComment;

ENDCASE => {token.class Ź tokenLESS; GO TO GotNext};

};

'> => {

NextChar[];

IF char = '=

THEN {token.class Ź tokenGE; GO TO GetNext}

ELSE {token.class Ź tokenGREATER; GO TO GotNext};

};

'~ => {

NextChar[];

SELECT char FROM

'= => {token.class Ź tokenNE; GO TO GetNext};

'< => {token.class Ź tokenGE; GO TO GetNext};

'> => {token.class Ź tokenLE; GO TO GetNext};

'~ => {token.class Ź tokenBIND; GO TO GetNext};

ENDCASE => {token.class Ź tokenTILDE; GO TO GotNext};

};

leftDoubleAngleChar => GO TO ScanComment;

assignChar => {token.class Ź scanTab['←]; GO TO GetNext};

uparrowChar => {token.class Ź scanTab['^]; GO TO GetNext};

multiplyChar => {token.class Ź scanTab['*]; GO TO GetNext};

divideChar => {token.class Ź scanTab['/]; GO TO GetNext};

ENDCASE => {

token.class Ź IF char < 200C THEN scanTab[char] ELSE 0;

IF token.class # 0 THEN GO TO GetNext;

NextChar[];

ScanError[$char, token.index];

};

EXITS ScanComment => {

state: {plain, leftBrocket, rightBrocket} Ź plain;

nest: CARDINAL Ź 1;

IF NextCharInline[] THEN GO TO EndFile;

SELECT char FROM

'> => SELECT state FROM

plain, leftBrocket => state Ź rightBrocket;

rightBrocket => {state Ź plain; IF (nest Ź nest - 1) = 0 THEN EXIT};

ENDCASE;

'< => SELECT state FROM

plain, rightBrocket => state Ź leftBrocket;

leftBrocket => {state Ź plain; nest Ź nest + 1};

ENDCASE;

leftDoubleAngleChar => {state Ź plain; nest Ź nest + 1};

rightDoubleAngleChar => {state Ź plain; IF (nest Ź nest - 1) = 0 THEN EXIT};

ENDCASE => state Ź plain;

ENDLOOP;

NextChar[];

};

REPEAT

GetNext => {IF (tIŹtI+1) = tMax THEN FillBuffer[]; char Ź tB[tI]};

GotNext => {};

EndFile => {

token.class Ź endMarker;

token.index Ź tOrigin + (tI-1);

token.value Ź MimP1.nullValue;

UNTIL tEnded DO FillBuffer[] ENDLOOP; -- flush stream

FillBuffer[];

char Ź tB[tI];

};

ENDLOOP;

IF token.class # endMarker THEN

SourceMarks.StartSource[token.index, mark];

nTokens Ź nTokens + 1;

};

numerical conversion

LongLit: TYPE = ConstArith.Const;

endMark: CHAR = '\000;

CollectNumber: PROC
[i: CARDINAL, float: BOOL] RETURNS [class: TSymbol, value: MimP1.Value] = {

hexCount: NAT Ź 0;

hexSig: PACKED ARRAY CHAR['a..'h] OF {F, T} Ź ALL[F];

v: LongLit Ź const0;

Accept: PROC = INLINE {

buffer[i] Ź char;

IF (i Ź i+1) >= iMax THEN ExpandBuffer[];

NextChar[];

};

class Ź tokenNUM;

SELECT char FROM

IN ['0..'9] => Accept[];

'e, 'E => {

hexSig['e] Ź T;

hexCount Ź hexCount + 1;

Accept[];

IF hexCount = 1 AND char = '+ OR char = '- THEN {float Ź TRUE; Accept[]};

};

IN ['a..'f] => {hexSig[char] Ź T; hexCount Ź hexCount+1; Accept[]};

IN ['A..'F] => {

hexSig[char+('a-'A)] Ź T;

hexCount Ź hexCount+1;

Accept[];

};

'h, 'H => {

hexSig['h] Ź T;

hexCount Ź hexCount+1;

Accept[];

};

'. => {

IF hexCount # 0 OR float THEN EXIT;

NextChar[];

IF char = '. THEN {qDot Ź TRUE; EXIT};

float Ź TRUE;

buffer[i] Ź '.;

IF (i Ź i+1) >= iMax THEN ExpandBuffer[];

};

ENDCASE => EXIT;

ENDLOOP;

buffer[i] Ź endMark;

SELECT TRUE FROM

float => GO TO floatExit;

(hexSig['h] = T) => v Ź ScanHex[buffer];

hexCount = 0 => v Ź ScanDecimal[buffer];

hexCount # 1 => v Ź ScanHex[buffer];

ENDCASE =>

SELECT hexSig FROM

[F,T,F,F,F,F,F,F] => v Ź ScanOctal[buffer];

[F,F,T,F,F,F,F,F] => class Ź tokenCHAR;

[F,F,F,T,F,F,F,F] => v Ź ScanDecimal[buffer];

[F,F,F,F,T,F,F,F] => GO TO floatExit;

ENDCASE => v Ź ScanHex[buffer];

SELECT class FROM

tokenCHAR => {

i: CARDINAL Ź 0;

c: CHAR Ź buffer[i];

val: CARDINAL Ź 0;

over: BOOL Ź FALSE;

IF c NOT IN ['0..'7] THEN GO TO bogus;

WHILE (c Ź buffer[i]) IN ['0..'7] DO

d: [0..7] = Digit[c];

val Ź val*8 + d;

i Ź i+1;

IF val > Target.maxChar THEN over Ź TRUE;

ENDLOOP;

IF c = 'c OR c = 'C THEN c Ź buffer[iŹi+1] ELSE GO TO bogus;

IF over THEN ERROR ConstArith.Overflow;

IF c # endMark THEN GO TO bogus;

value Ź [ref[LiteralOps.Find[either, SymbolOps.EncodeCard[val]]]];

EXITS bogus => ERROR BogusLiteral;

};

ENDCASE => {

lastLongInt: CARD = Target.maxLongWord / 2;

IF ConstArith.Compare[v, constSplit] # less

THEN {

hiC: CARD = ConstArith.ToCard[ConstArith.Div[v, constSplit]];

hiV: Literals.LTIndex = LiteralOps.Find[unsigned, SymbolOps.EncodeCard[hiC]];

hiT: Tree.Link = [literal[hiV]];

loC: CARD = ConstArith.ToCard[ConstArith.Mod[v, constSplit]];

loV: Literals.LTIndex = LiteralOps.Find[unsigned, SymbolOps.EncodeCard[loC]];

loT: Tree.Link = [literal[loV]];

resT: Tree.Link;

type: Symbols.Type = IF hiC > lastLongInt
THEN MimData.idDCARD
ELSE MimData.idDINT;

TreeOps.PushTree[hiT];

TreeOps.PushTree[loT];

TreeOps.SetInfo[LOOPHOLE[type]];

resT Ź TreeOps.MakeNode[mwconst, 2];

value Ź [ref[TreeOps.GetNode[resT]]];

}

ELSE {

litClass: Literals.LitClass Ź either;

card: CARD Ź ConstArith.ToCard[v];

IF card > lastLongInt THEN litClass Ź unsigned;

value Ź [ref[LiteralOps.Find[litClass, SymbolOps.EncodeCard[card]]]];

};

EXITS floatExit => {

class Ź tokenFLNUM;

desc.length Ź i;

value.r Ź LOOPHOLE[LiteralOps.FindString[desc]];

};

Digit: ARRAY CHAR ['0..'9] OF [0..9] = [0,1,2,3,4,5,6,7,8,9];

HexDigit: ARRAY CHAR ['A..'F] OF [0..15] = [10,11,12,13,14,15];

the following 5 procs assume that the host CARD is good for at least 32 bits

const0: LongLit = ConstArith.FromCard[0];

const8: LongLit = ConstArith.FromCard[8];

const10: LongLit = ConstArith.FromCard[10];

const16: LongLit = ConstArith.FromCard[16];

constSplit: LongLit = ConstArith.Add[
ConstArith.FromCard[CARD.LAST], ConstArith.FromCard[1]];

AppendToScale: PROC [v: CARDINAL, digit: CHAR ['0..'9]] RETURNS [CARDINAL] = {

d: [0..9] = Digit[digit];

next: CARDINAL = v*10 + d;

IF next < v THEN ERROR ConstArith.Overflow;

RETURN [next];

};

ScanDecimal: PROC [s: REF TEXT] RETURNS [LongLit] = {

v: LongLit Ź const0;

{

i: CARDINAL Ź 0;

c: CHAR Ź s[i];

IF c NOT IN ['0..'9] THEN GO TO bogus;

d: [0..9] = Digit[c];

v Ź ConstArith.Add[ConstArith.Mul[const10, v], ConstArith.FromCard[d]];

i Ź i+1;

c Ź s[i];

IF c NOT IN ['0..'9] THEN EXIT;

ENDLOOP;

SELECT c FROM

'd, 'D => {

scale: CARDINAL Ź 0;

i Ź i+1;

c Ź s[i];

IF c NOT IN ['0..'9] THEN EXIT;

scale Ź AppendToScale[scale, c];

ENDLOOP;

THROUGH [1 .. scale] DO v Ź ConstArith.Mul[const10, v]; ENDLOOP;

};

ENDCASE;

IF c # endMark THEN GO TO bogus;

EXITS bogus => ERROR BogusLiteral;

};

RETURN [v];

};

ScanOctal: PROC [s: REF TEXT] RETURNS [LongLit] = {

v: LongLit Ź const0;

{

i: CARDINAL Ź 0;

c: CHAR Ź s[i];

IF c NOT IN ['0..'7] THEN GO TO bogus;

d: [0..7] = Digit[c];

v Ź ConstArith.Add[ConstArith.Mul[const8, v], ConstArith.FromCard[d]];

i Ź i+1;

c Ź s[i];

IF c NOT IN ['0..'7] THEN EXIT;

ENDLOOP;

SELECT c FROM

'b, 'B => {

scale: CARDINAL Ź 0;

i Ź i+1;

c Ź s[i];

IF c NOT IN ['0..'9] THEN EXIT;

scale Ź AppendToScale[scale, c];

ENDLOOP;

THROUGH [1 .. scale] DO v Ź ConstArith.Mul[const8, v]; ENDLOOP

};

ENDCASE;

IF c # endMark THEN GO TO bogus;

EXITS bogus => ERROR BogusLiteral;

};

RETURN [v];

};

ScanHex: PROC [s: REF TEXT] RETURNS [LongLit] = {

v: LongLit Ź const0;

{

i: CARDINAL Ź 0;

c: CHAR Ź s[i];

IF c NOT IN ['0..'9] THEN GO TO bogus;

d: [0..15] Ź 0;

SELECT c FROM

IN ['0..'9] => d Ź Digit[c];

IN ['A..'F] => d Ź HexDigit[c];

IN ['a..'f] => d Ź HexDigit[VAL[(c.ORD-'a.ORD)+'A.ORD]];

ENDCASE => EXIT;

v Ź ConstArith.Add[ConstArith.Mul[const16, v], ConstArith.FromCard[d]];

i Ź i + 1;

c Ź s[i];

ENDLOOP;

IF c = 'h OR c = 'H THEN {

scale: CARDINAL Ź 0;

WHILE (c Ź s[iŹi+1]) IN ['0..'9] DO scale Ź AppendToScale[scale, c]; ENDLOOP;

THROUGH [1 .. scale] DO v Ź ConstArith.Mul[const16, v]; ENDLOOP;

};

IF c # endMark THEN GO TO bogus;

EXITS bogus => ERROR BogusLiteral;

};

RETURN [v];

};

character and string constants

escapeMark: CHAR = '\\;

Escape: PROC RETURNS [c: CHAR, valid, advance: BOOLŹTRUE] = {

c Ź char;

IF c = escapeMark THEN {

NextChar[];

SELECT char FROM

'n, 'N => c Ź Target.newLineChar;

'r, 'R => c Ź Ascii.CR;

'l, 'L => c Ź Ascii.LF;

't, 'T => c Ź Ascii.TAB;

'b, 'B => c Ź Ascii.BS;

'f, 'F => c Ź Ascii.FF;

'', '", escapeMark => c Ź char;

IN ['0 .. '7] => {

nc, v: CARDINAL Ź 0;

IF NOT (char IN ['0..'7]) THEN {valid Ź advance Ź FALSE; EXIT};

v Ź 8*v + Digit[char];

IF (nc Ź nc+1) = 3 THEN EXIT;

NextChar[];

ENDLOOP;

IF v > Target.maxChar THEN {valid Ź FALSE; v Ź 0};

c Ź v + 0c;

};

ENDCASE => valid Ź advance Ź FALSE

};

initialization/finalization

ScanInit: PUBLIC PROC [source: IO.STREAM, logger: PROC [PROC [log: IO.STREAM]]] = {

stream Ź source;

Logger Ź logger;

SourceMarks.Reset[];

IF buffer = NIL OR buffer.length # 256 THEN buffer Ź MimZones.permZone.NEW[TEXT[256]];

desc.base Ź LOOPHOLE[buffer, LONG POINTER]; desc.offset Ź 0;

iMax Ź buffer.length Ź buffer.maxLength;

streamOrigin Ź stream.GetIndex[];

tB Ź MimZones.permZone.NEW[TextBuffer];

tOrigin Ź tLimit Ź 0;

tMax Ź 0;

tEnded Ź qDot Ź FALSE;

FillBuffer[];

char Ź tB[tI];

nTokens Ź nErrors Ź 0;

};

ScanStats: PUBLIC PROC RETURNS [NAT, NAT] = {

RETURN [nTokens, nErrors];

};

ScanReset: PUBLIC PROC = {

MimZones.permZone.FREE[@buffer];

IF tB # NIL THEN MimZones.permZone.FREE[@tB];

stream Ź NIL;

Logger Ź NIL;

};

error handling

ResetScanIndex: PUBLIC PROC [index: MimP1.Index] RETURNS [success: BOOL] = {

IF NOT (index IN [tOrigin .. tLimit)) THEN {

page: CARDINAL = index/textChars;

tOrigin Ź tLimit Ź page*textChars;

tMax Ź 0;

tEnded Ź FALSE;

stream.SetIndex[streamOrigin + tOrigin];

FillBuffer[];

};

tI Ź index - tOrigin;

IF tI >= tMax THEN FillBuffer[];

char Ź tB[tI];

RETURN [TRUE];

};

ErrorCode: TYPE = {overflow, number, string, char, atom, escape};

ScanError: PROC [code: ErrorCode, tokenIndex: MimP1.Index] = {

Inner: PROC [log: IO.STREAM] = {

ErrorContext[log,

SELECT code FROM

$overflow => "number too large",

$number => "invalid number",

$string => "string unterminated or too long",

$char => "invalid character",

$atom => "invalid atom",

$escape => "invalid escape sequence",

ENDCASE => NIL,

tokenIndex];

log.PutChar['\n];

};

nErrors Ź nErrors + 1;

Logger[Inner];

};

ErrorContext: PUBLIC PROC [to: IO.STREAM, message: Rope.ROPE, tokenIndex: MimP1.Index] = {

RRA: make sure that this routine essentially matches MimosaLogImpl.PrintTextLine

saveIndex: MimP1.Index = IO.GetIndex[stream];

origin: MimP1.Index = streamOrigin + tokenIndex;

start: MimP1.Index Ź origin;

lineIndex: MimP1.Index Ź origin;

shown: BOOL Ź FALSE;

FOR n: [1..100] IN [1..100] UNTIL lineIndex = 0 DO

lineIndex Ź lineIndex - 1;

IO.SetIndex[stream, lineIndex];

SELECT IO.GetChar[stream] FROM

Ascii.CR, Ascii.LF, '\n => EXIT;

ENDCASE => start Ź lineIndex;

ENDLOOP;

IO.SetIndex[stream, start];

FOR n: [1..100] IN [1..100] DO

char: CHAR Ź IO.GetChar[stream ! IO.EndOfStream => EXIT];

IF start+n > origin THEN

IF NOT shown THEN {

This marks the spot where the error is.

IO.PutRope[to, " <<!!>> "];

shown Ź TRUE;

};

SELECT char FROM

Ascii.CR, Ascii.LF, '\n => EXIT;

ENDCASE => IO.PutChar[to, char];

ENDLOOP;

IO.PutF[to, "\n[%d] %g\n", [integer[origin]], [rope[message]]];

IO.SetIndex[stream, saveIndex];

};

Block source stuff

FindClass: PROC [string: STRING] RETURNS [CARDINAL] = {

len: CARDINAL = string.length;

firstChar: NAT Ź string[0].ORD;

lastChar: NAT Ź string[len-1].ORD;

h: HashIndex Ź ((firstChar*128-firstChar) + lastChar) MOD HashIndex.LAST + 1;

j: CARDINAL;

WHILE (j Ź hashTab[h].symbol) # 0 DO

s2: CARDINAL Ź vocabIndex[j-1];

IF vocabIndex[j]-s2 = len THEN

FOR s1: CARDINAL IN [0 .. len) DO

IF string[s1] # vocab.text[s2+s1] THEN EXIT;

REPEAT

FINISHED => RETURN [j];

ENDLOOP;

IF (h Ź hashTab[h].link) = 0 THEN EXIT;

ENDLOOP;

ERROR;

};

endClass: CARDINAL;

beginClass: CARDINAL;