DIRECTORY Ascii USING [BS, CR, FF, LF, TAB], ConstArith USING [Add, Compare, Const, Div, FromCard, Mod, Mul, Overflow, ToCard], ConvertUnsafe USING [SubString], IO USING [EndOfStream, GetChar, GetIndex, PutChar, PutF, PutRope, SetIndex, STREAM, UnsafeGetBlock], LiteralOps USING [Find, FindString], Literals USING [LitClass, LTIndex], MimData USING [idDCARD, idDINT], MimP1 USING [Index, Token, Value, nullValue], MimZones USING [permZone], ParseTable USING [endMarker, HashIndex, HashTableRef, IndexTableRef, InitHashTable, InitIndexTable, InitScanTable, InitVocabulary, ScanTableRef, tokenARROW, tokenATOM, tokenBIND, tokenCHAR, tokenDOT, tokenDOTS, tokenEQUAL, tokenFLNUM, tokenGE, tokenGREATER, tokenID, tokenLE, tokenLESS, tokenLSTR, tokenMINUS, tokenNE, tokenNUM, tokenPOWER, tokenSTR, tokenTILDE, TSymbol, VocabularyRef], RefText USING [Append], Rope USING [ROPE], SourceMarks USING [Reset, StartSource], SymbolOps USING [EncodeCard, EnterString], Symbols USING [nullName, Type], Target: TYPE MachineParms USING [maxChar, maxLongWord, newLineChar], Tree USING [Link], TreeOps USING [GetNode, MakeNode, PushTree, SetInfo]; MimScanner: PROGRAM IMPORTS ConstArith, IO, LiteralOps, MimData, MimZones, ParseTable, RefText, SourceMarks, SymbolOps, TreeOps EXPORTS MimP1 = { OPEN ParseTable; otherDollarChar: CHAR = '\244; -- € in the Xerox Character Code standard leftDoubleAngleChar: CHAR = '\253; -- « in the Xerox Character Code standard assignChar: CHAR = '\254; -- ¬ in the Xerox Character Code standard uparrowChar: CHAR = '\255; -- ­ in the Xerox Character Code standard multiplyChar: CHAR = '\264; -- ΄ in the Xerox Character Code standard divideChar: CHAR = '\270; -- Έ in the Xerox Character Code standard rightDoubleAngleChar: CHAR = '\273; -- » in the Xerox Character Code standard hashTab: HashTableRef ¬ NIL; scanTab: ScanTableRef ¬ NIL; vocab: VocabularyRef ¬ NIL; vocabIndex: IndexTableRef ¬ NIL; InstallScanTable: PUBLIC PROC = { IF vocab = NIL THEN { scanTab ¬ ParseTable.InitScanTable[]; hashTab ¬ ParseTable.InitHashTable[]; vocabIndex ¬ ParseTable.InitIndexTable[]; vocab ¬ ParseTable.InitVocabulary[]; endClass ¬ FindClass["END"]; beginClass ¬ FindClass["BEGIN"]; }; }; stream: IO.STREAM ¬ NIL; -- the input stream streamOrigin: MimP1.Index; -- FileStream.FileByteIndex Logger: PROC [PROC [log: IO.STREAM]] ¬ NIL; textChars: NAT = 4096; TextBuffer: TYPE = PACKED ARRAY [0..textChars) OF CHAR; tB: REF TextBuffer ¬ NIL; tI: [0..textChars] ¬ 0; tMax: [0..textChars] ¬ 0; tOrigin: MimP1.Index ¬ 0; tLimit: MimP1.Index ¬ 0; tEnded: BOOL ¬ FALSE; FillBuffer: PROC = { tOrigin ¬ tLimit; IF tEnded THEN tMax ¬ 0 ELSE { tMax ¬ stream.UnsafeGetBlock[[LOOPHOLE[tB], 0, textChars]].nBytesRead; IF tMax < textChars THEN tEnded ¬ TRUE; tLimit ¬ tOrigin + tMax; }; IF tMax = 0 THEN {tB[0] ¬ '\000; tMax ¬ 1}; tI ¬ 0; }; buffer: REF TEXT ¬ NIL; -- token assembly area iMax: CARDINAL ¬ 0; -- iMax = buffer.maxLength desc: ConvertUnsafe.SubString; -- initial buffer segment nTokens: NAT ¬ 0; -- token count nErrors: NAT ¬ 0; -- lexical errors BogusLiteral: ERROR = CODE; BufferOverflow: ERROR = CODE; maxBufferSize: NAT = 30000; ExpandBuffer: PROC = { oldBuffer: REF TEXT ¬ buffer; len: NAT ¬ oldBuffer.length; SELECT len FROM = maxBufferSize => ERROR BufferOverflow; > maxBufferSize/2 => len ¬ maxBufferSize; ENDCASE => len ¬ len + len; buffer ¬ NEW[TEXT[len]]; desc.base ¬ LOOPHOLE[buffer, LONG POINTER]; buffer ¬ RefText.Append[to: buffer, from: oldBuffer]; iMax ¬ buffer.length ¬ buffer.maxLength; MimZones.permZone.FREE[@oldBuffer]; }; char: CHAR ¬ 0C; -- current (most recently scanned) character qDot: BOOL ¬ FALSE; -- used to resolved decimal point vs. interval NextChar: PROC = { IF (tI¬tI+1) = tMax THEN FillBuffer[]; char ¬ tB[tI]; }; NextCharInline: PROC RETURNS [eof: BOOL] = INLINE { IF (tI¬tI+1) = tMax THEN {IF tEnded THEN RETURN [TRUE]; FillBuffer[]}; char ¬ tB[tI]; RETURN [FALSE]; }; NextToken: PUBLIC PROC RETURNS [token: MimP1.Token] = { mark: ATOM ¬ NIL; DO WHILE char IN ['\000..' ] DO SELECT char FROM '\000 => { IF NextCharInline[] THEN GO TO EndFile; IF char = '\000 THEN GO TO EndFile; }; ENDCASE => IF NextCharInline[] THEN GO TO EndFile; ENDLOOP; token.index ¬ tOrigin + tI; token.value ¬ MimP1.nullValue; { SELECT char FROM 'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm, 'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z => { i: CARDINAL ¬ 0; DO buffer[i] ¬ char; IF (tI¬tI+1) = tMax THEN FillBuffer[]; char ¬ tB[tI]; SELECT char FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => IF (i ¬ i+1) >= iMax THEN ExpandBuffer[]; ENDCASE => EXIT; ENDLOOP; desc.length ¬ i+1; token.class ¬ tokenID; token.value.r ¬ SymbolOps.EnterString[desc]; GO TO GotNext; }; 'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M, 'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z => { i: CARDINAL ¬ 0; first, last: NAT ¬ char.ORD; state: {uid, uidn, other} ¬ uid; DO buffer[i] ¬ char; IF (tI¬tI+1) = tMax THEN FillBuffer[]; char ¬ tB[tI]; SELECT char FROM IN ['A..'Z] => IF state = uidn THEN state ¬ other; IN ['0..'9] => IF state = uid THEN state ¬ uidn; IN ['a..'z] => state ¬ other; ENDCASE => EXIT; last ¬ char.ORD; IF (i ¬ i+1) >= iMax THEN ExpandBuffer[]; ENDLOOP; i ¬ i+1; IF state # other THEN { h: HashIndex ¬ ((first*128-first) + last) MOD HashIndex.LAST + 1; j: CARDINAL; WHILE (j ¬ hashTab[h].symbol) # 0 DO s2: CARDINAL = vocabIndex[j-1]; IF vocabIndex[j]-s2 = i THEN FOR s1: CARDINAL IN [0 .. i) DO IF buffer[s1] # vocab.text[s2+s1] THEN EXIT; REPEAT FINISHED => { token.class ¬ j; SELECT j FROM beginClass => mark ¬ $Begin; endClass => mark ¬ $End; ENDCASE; GO TO GotNext; }; ENDLOOP; IF (h ¬ hashTab[h].link) = 0 THEN EXIT; ENDLOOP; }; desc.length ¬ i; token.class ¬ tokenID; token.value.r ¬ SymbolOps.EnterString[desc]; GO TO GotNext; }; '0, '1, '2, '3, '4, '5, '6, '7, '8, '9 => { { ENABLE { BogusLiteral => GO TO numberSyntax; ConstArith.Overflow => GO TO numberOverflow; }; [token.class, token.value] ¬ CollectNumber[i: 0, float: FALSE]; EXITS numberSyntax => ScanError[$number, token.index]; numberOverflow => ScanError[$overflow, token.index]; }; GO TO GotNext; }; ': => { NextChar[]; IF char = '= THEN {token.class ¬ scanTab['_]; GO TO GetNext} ELSE {token.class ¬ scanTab[':]; GO TO GotNext}; }; '{ => { token.class ¬ scanTab[char]; mark ¬ $Begin; GO TO GetNext; }; '} => { token.class ¬ scanTab[char]; mark ¬ $End; GO TO GetNext; }; '; => { token.class ¬ scanTab[char]; mark ¬ $Semi; GO TO GetNext; }; '* => { token.class ¬ scanTab[char]; NextChar[]; IF char = '* THEN {token.class ¬ tokenPOWER; GO TO GetNext}; GO TO GotNext; }; ',, ';, '#, '+, '/, '@, '!, '(, '), '[, '], '_, '^, '| => { token.class ¬ scanTab[char]; GO TO GetNext; }; '' => { c: CHAR; valid, advance: BOOL; NextChar[]; [c, valid, advance] ¬ Escape[]; IF NOT valid THEN ScanError[$escape, token.index + 1]; token.class ¬ tokenCHAR; token.value.r ¬ LiteralOps.Find[either, SymbolOps.EncodeCard[c.ORD]]; IF advance THEN GO TO GetNext ELSE GO TO GotNext; }; '" => { i: CARDINAL ¬ 0; valid: BOOL; advance: BOOL ¬ TRUE; DO IF advance THEN IF NextCharInline[] THEN GO TO EOFEnd; SELECT char FROM '" => { IF (tI¬tI+1) = tMax THEN FillBuffer[]; char ¬ tB[tI]; IF char # '" THEN GO TO QuoteEnd; }; ENDCASE; IF i >= iMax THEN ExpandBuffer[ ! BufferOverflow => {ScanError[$string, token.index]; i ¬ 0; CONTINUE}]; [buffer[i], valid, advance] ¬ Escape[]; i ¬ i+1; IF NOT valid THEN ScanError[$escape, tOrigin + tI]; REPEAT QuoteEnd => NULL; EOFEnd => {ScanError[$string, token.index]; FillBuffer[]; char ¬ tB[tI]}; ENDLOOP; desc.length ¬ i; token.value.r ¬ LiteralOps.FindString[desc]; SELECT char FROM 'L, 'l => {token.class ¬ tokenLSTR; GO TO GetNext}; 'G, 'g => {token.class ¬ tokenSTR; GO TO GetNext}; ENDCASE => {token.class ¬ tokenSTR; GO TO GotNext}; }; '$, otherDollarChar => { i: CARDINAL ¬ 0; NextChar[]; token.class ¬ tokenATOM; token.value.r ¬ Symbols.nullName; SELECT char FROM IN ['a..'z], IN ['A..'Z] => NULL; ENDCASE => { ScanError[$atom, token.index]; GO TO GotNext; }; DO SELECT char FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => { IF i >= iMax THEN ExpandBuffer[]; buffer[i] ¬ char; i ¬ i+1}; ENDCASE => EXIT; NextChar[]; ENDLOOP; desc.length ¬ i; token.value.r ¬ SymbolOps.EnterString[desc]; GO TO GotNext; }; '- => { NextChar[]; IF char # '- THEN {token.class ¬ tokenMINUS; GO TO GotNext}; char ¬ '\000; DO pChar: CHAR = char; IF NextCharInline[] THEN GO TO EndFile; SELECT char FROM '- => IF pChar = '- THEN EXIT; Ascii.CR, Ascii.LF, '\n => EXIT; ENDCASE; ENDLOOP; NextChar[]; }; '. => { IF qDot THEN { qDot ¬ FALSE; token.index ¬ token.index-1; token.class ¬ tokenDOTS; GO TO GetNext; }; NextChar[]; SELECT char FROM '. => {token.class ¬ tokenDOTS; GO TO GetNext}; IN ['0..'9] => { buffer[0] ¬ '.; { ENABLE { BogusLiteral => GO TO numberSyntax; ConstArith.Overflow => GO TO numberOverflow; }; [token.class, token.value] ¬ CollectNumber[i: 1, float: TRUE]; EXITS numberSyntax => ScanError[$number, token.index]; numberOverflow => ScanError[$overflow, token.index]; }; GO TO GotNext; }; ENDCASE => {token.class ¬ tokenDOT; GO TO GotNext}; }; '= => { NextChar[]; IF char = '> THEN {token.class ¬ tokenARROW; GO TO GetNext} ELSE {token.class ¬ tokenEQUAL; GO TO GotNext}; }; '< => { NextChar[]; SELECT char FROM '= => {token.class ¬ tokenLE; GO TO GetNext}; '< => GO TO ScanComment; ENDCASE => {token.class ¬ tokenLESS; GO TO GotNext}; }; '> => { NextChar[]; IF char = '= THEN {token.class ¬ tokenGE; GO TO GetNext} ELSE {token.class ¬ tokenGREATER; GO TO GotNext}; }; '~ => { NextChar[]; SELECT char FROM '= => {token.class ¬ tokenNE; GO TO GetNext}; '< => {token.class ¬ tokenGE; GO TO GetNext}; '> => {token.class ¬ tokenLE; GO TO GetNext}; '~ => {token.class ¬ tokenBIND; GO TO GetNext}; ENDCASE => {token.class ¬ tokenTILDE; GO TO GotNext}; }; leftDoubleAngleChar => GO TO ScanComment; assignChar => {token.class ¬ scanTab['_]; GO TO GetNext}; uparrowChar => {token.class ¬ scanTab['^]; GO TO GetNext}; multiplyChar => {token.class ¬ scanTab['*]; GO TO GetNext}; divideChar => {token.class ¬ scanTab['/]; GO TO GetNext}; ENDCASE => { token.class ¬ IF char < 200C THEN scanTab[char] ELSE 0; IF token.class # 0 THEN GO TO GetNext; NextChar[]; ScanError[$char, token.index]; }; EXITS ScanComment => { state: {plain, leftBrocket, rightBrocket} ¬ plain; nest: CARDINAL ¬ 1; DO IF NextCharInline[] THEN GO TO EndFile; SELECT char FROM '> => SELECT state FROM plain, leftBrocket => state ¬ rightBrocket; rightBrocket => {state ¬ plain; IF (nest ¬ nest - 1) = 0 THEN EXIT}; ENDCASE; '< => SELECT state FROM plain, rightBrocket => state ¬ leftBrocket; leftBrocket => {state ¬ plain; nest ¬ nest + 1}; ENDCASE; leftDoubleAngleChar => {state ¬ plain; nest ¬ nest + 1}; rightDoubleAngleChar => {state ¬ plain; IF (nest ¬ nest - 1) = 0 THEN EXIT}; ENDCASE => state ¬ plain; ENDLOOP; NextChar[]; }; }; REPEAT GetNext => {IF (tI¬tI+1) = tMax THEN FillBuffer[]; char ¬ tB[tI]}; GotNext => {}; EndFile => { token.class ¬ endMarker; token.index ¬ tOrigin + (tI-1); token.value ¬ MimP1.nullValue; UNTIL tEnded DO FillBuffer[] ENDLOOP; -- flush stream FillBuffer[]; char ¬ tB[tI]; }; ENDLOOP; IF token.class # endMarker THEN SourceMarks.StartSource[token.index, mark]; nTokens ¬ nTokens + 1; }; LongLit: TYPE = ConstArith.Const; endMark: CHAR = '\000; CollectNumber: PROC [i: CARDINAL, float: BOOL] RETURNS [class: TSymbol, value: MimP1.Value] = { hexCount: NAT ¬ 0; hexSig: PACKED ARRAY CHAR['a..'h] OF {F, T} ¬ ALL[F]; v: LongLit ¬ const0; Accept: PROC = INLINE { buffer[i] ¬ char; IF (i ¬ i+1) >= iMax THEN ExpandBuffer[]; NextChar[]; }; class ¬ tokenNUM; DO SELECT char FROM IN ['0..'9] => Accept[]; 'e, 'E => { hexSig['e] ¬ T; hexCount ¬ hexCount + 1; Accept[]; IF hexCount = 1 AND char = '+ OR char = '- THEN {float ¬ TRUE; Accept[]}; }; IN ['a..'f] => {hexSig[char] ¬ T; hexCount ¬ hexCount+1; Accept[]}; IN ['A..'F] => { hexSig[char+('a-'A)] ¬ T; hexCount ¬ hexCount+1; Accept[]; }; 'h, 'H => { hexSig['h] ¬ T; hexCount ¬ hexCount+1; Accept[]; }; '. => { IF hexCount # 0 OR float THEN EXIT; NextChar[]; IF char = '. THEN {qDot ¬ TRUE; EXIT}; float ¬ TRUE; buffer[i] ¬ '.; IF (i ¬ i+1) >= iMax THEN ExpandBuffer[]; }; ENDCASE => EXIT; ENDLOOP; buffer[i] ¬ endMark; SELECT TRUE FROM float => GO TO floatExit; (hexSig['h] = T) => v ¬ ScanHex[buffer]; hexCount = 0 => v ¬ ScanDecimal[buffer]; hexCount # 1 => v ¬ ScanHex[buffer]; ENDCASE => SELECT hexSig FROM [F,T,F,F,F,F,F,F] => v ¬ ScanOctal[buffer]; [F,F,T,F,F,F,F,F] => class ¬ tokenCHAR; [F,F,F,T,F,F,F,F] => v ¬ ScanDecimal[buffer]; [F,F,F,F,T,F,F,F] => GO TO floatExit; ENDCASE => v ¬ ScanHex[buffer]; SELECT class FROM tokenCHAR => { i: CARDINAL ¬ 0; c: CHAR ¬ buffer[i]; val: CARDINAL ¬ 0; over: BOOL ¬ FALSE; IF c NOT IN ['0..'7] THEN GO TO bogus; WHILE (c ¬ buffer[i]) IN ['0..'7] DO d: [0..7] = Digit[c]; val ¬ val*8 + d; i ¬ i+1; IF val > Target.maxChar THEN over ¬ TRUE; ENDLOOP; IF c = 'c OR c = 'C THEN c ¬ buffer[i¬i+1] ELSE GO TO bogus; IF over THEN ERROR ConstArith.Overflow; IF c # endMark THEN GO TO bogus; value ¬ [ref[LiteralOps.Find[either, SymbolOps.EncodeCard[val]]]]; EXITS bogus => ERROR BogusLiteral; }; ENDCASE => { lastLongInt: CARD = Target.maxLongWord / 2; IF ConstArith.Compare[v, constSplit] # less THEN { hiC: CARD = ConstArith.ToCard[ConstArith.Div[v, constSplit]]; hiV: Literals.LTIndex = LiteralOps.Find[unsigned, SymbolOps.EncodeCard[hiC]]; hiT: Tree.Link = [literal[hiV]]; loC: CARD = ConstArith.ToCard[ConstArith.Mod[v, constSplit]]; loV: Literals.LTIndex = LiteralOps.Find[unsigned, SymbolOps.EncodeCard[loC]]; loT: Tree.Link = [literal[loV]]; resT: Tree.Link; type: Symbols.Type = IF hiC > lastLongInt THEN MimData.idDCARD ELSE MimData.idDINT; TreeOps.PushTree[hiT]; TreeOps.PushTree[loT]; TreeOps.SetInfo[LOOPHOLE[type]]; resT ¬ TreeOps.MakeNode[mwconst, 2]; value ¬ [ref[TreeOps.GetNode[resT]]]; } ELSE { litClass: Literals.LitClass ¬ either; card: CARD ¬ ConstArith.ToCard[v]; IF card > lastLongInt THEN litClass ¬ unsigned; value ¬ [ref[LiteralOps.Find[litClass, SymbolOps.EncodeCard[card]]]]; }; }; EXITS floatExit => { class ¬ tokenFLNUM; desc.length ¬ i; value.r ¬ LOOPHOLE[LiteralOps.FindString[desc]]; }; }; Digit: ARRAY CHAR ['0..'9] OF [0..9] = [0,1,2,3,4,5,6,7,8,9]; HexDigit: ARRAY CHAR ['A..'F] OF [0..15] = [10,11,12,13,14,15]; const0: LongLit = ConstArith.FromCard[0]; const8: LongLit = ConstArith.FromCard[8]; const10: LongLit = ConstArith.FromCard[10]; const16: LongLit = ConstArith.FromCard[16]; constSplit: LongLit = ConstArith.Add[ ConstArith.FromCard[CARD.LAST], ConstArith.FromCard[1]]; AppendToScale: PROC [v: CARDINAL, digit: CHAR ['0..'9]] RETURNS [CARDINAL] = { d: [0..9] = Digit[digit]; next: CARDINAL = v*10 + d; IF next < v THEN ERROR ConstArith.Overflow; RETURN [next]; }; ScanDecimal: PROC [s: REF TEXT] RETURNS [LongLit] = { v: LongLit ¬ const0; { i: CARDINAL ¬ 0; c: CHAR ¬ s[i]; IF c NOT IN ['0..'9] THEN GO TO bogus; DO d: [0..9] = Digit[c]; v ¬ ConstArith.Add[ConstArith.Mul[const10, v], ConstArith.FromCard[d]]; i ¬ i+1; c ¬ s[i]; IF c NOT IN ['0..'9] THEN EXIT; ENDLOOP; SELECT c FROM 'd, 'D => { scale: CARDINAL ¬ 0; DO i ¬ i+1; c ¬ s[i]; IF c NOT IN ['0..'9] THEN EXIT; scale ¬ AppendToScale[scale, c]; ENDLOOP; THROUGH [1 .. scale] DO v ¬ ConstArith.Mul[const10, v]; ENDLOOP; }; ENDCASE; IF c # endMark THEN GO TO bogus; EXITS bogus => ERROR BogusLiteral; }; RETURN [v]; }; ScanOctal: PROC [s: REF TEXT] RETURNS [LongLit] = { v: LongLit ¬ const0; { i: CARDINAL ¬ 0; c: CHAR ¬ s[i]; IF c NOT IN ['0..'7] THEN GO TO bogus; DO d: [0..7] = Digit[c]; v ¬ ConstArith.Add[ConstArith.Mul[const8, v], ConstArith.FromCard[d]]; i ¬ i+1; c ¬ s[i]; IF c NOT IN ['0..'7] THEN EXIT; ENDLOOP; SELECT c FROM 'b, 'B => { scale: CARDINAL ¬ 0; DO i ¬ i+1; c ¬ s[i]; IF c NOT IN ['0..'9] THEN EXIT; scale ¬ AppendToScale[scale, c]; ENDLOOP; THROUGH [1 .. scale] DO v ¬ ConstArith.Mul[const8, v]; ENDLOOP }; ENDCASE; IF c # endMark THEN GO TO bogus; EXITS bogus => ERROR BogusLiteral; }; RETURN [v]; }; ScanHex: PROC [s: REF TEXT] RETURNS [LongLit] = { v: LongLit ¬ const0; { i: CARDINAL ¬ 0; c: CHAR ¬ s[i]; IF c NOT IN ['0..'9] THEN GO TO bogus; DO d: [0..15] ¬ 0; SELECT c FROM IN ['0..'9] => d ¬ Digit[c]; IN ['A..'F] => d ¬ HexDigit[c]; IN ['a..'f] => d ¬ HexDigit[VAL[(c.ORD-'a.ORD)+'A.ORD]]; ENDCASE => EXIT; v ¬ ConstArith.Add[ConstArith.Mul[const16, v], ConstArith.FromCard[d]]; i ¬ i + 1; c ¬ s[i]; ENDLOOP; IF c = 'h OR c = 'H THEN { scale: CARDINAL ¬ 0; WHILE (c ¬ s[i¬i+1]) IN ['0..'9] DO scale ¬ AppendToScale[scale, c]; ENDLOOP; THROUGH [1 .. scale] DO v ¬ ConstArith.Mul[const16, v]; ENDLOOP; }; IF c # endMark THEN GO TO bogus; EXITS bogus => ERROR BogusLiteral; }; RETURN [v]; }; escapeMark: CHAR = '\\; Escape: PROC RETURNS [c: CHAR, valid, advance: BOOL¬TRUE] = { c ¬ char; IF c = escapeMark THEN { NextChar[]; SELECT char FROM 'n, 'N => c ¬ Target.newLineChar; 'r, 'R => c ¬ Ascii.CR; 'l, 'L => c ¬ Ascii.LF; 't, 'T => c ¬ Ascii.TAB; 'b, 'B => c ¬ Ascii.BS; 'f, 'F => c ¬ Ascii.FF; '', '", escapeMark => c ¬ char; IN ['0 .. '7] => { nc, v: CARDINAL ¬ 0; DO IF NOT (char IN ['0..'7]) THEN {valid ¬ advance ¬ FALSE; EXIT}; v ¬ 8*v + Digit[char]; IF (nc ¬ nc+1) = 3 THEN EXIT; NextChar[]; ENDLOOP; IF v > Target.maxChar THEN {valid ¬ FALSE; v ¬ 0}; c ¬ v + 0c; }; ENDCASE => valid ¬ advance ¬ FALSE }; }; ScanInit: PUBLIC PROC [source: IO.STREAM, logger: PROC [PROC [log: IO.STREAM]]] = { stream ¬ source; Logger ¬ logger; SourceMarks.Reset[]; IF buffer = NIL OR buffer.length # 256 THEN buffer ¬ MimZones.permZone.NEW[TEXT[256]]; desc.base ¬ LOOPHOLE[buffer, LONG POINTER]; desc.offset ¬ 0; iMax ¬ buffer.length ¬ buffer.maxLength; streamOrigin ¬ stream.GetIndex[]; tB ¬ MimZones.permZone.NEW[TextBuffer]; tOrigin ¬ tLimit ¬ 0; tMax ¬ 0; tEnded ¬ qDot ¬ FALSE; FillBuffer[]; char ¬ tB[tI]; nTokens ¬ nErrors ¬ 0; }; ScanStats: PUBLIC PROC RETURNS [NAT, NAT] = { RETURN [nTokens, nErrors]; }; ScanReset: PUBLIC PROC = { MimZones.permZone.FREE[@buffer]; IF tB # NIL THEN MimZones.permZone.FREE[@tB]; stream ¬ NIL; Logger ¬ NIL; }; ResetScanIndex: PUBLIC PROC [index: MimP1.Index] RETURNS [success: BOOL] = { IF NOT (index IN [tOrigin .. tLimit)) THEN { page: CARDINAL = index/textChars; tOrigin ¬ tLimit ¬ page*textChars; tMax ¬ 0; tEnded ¬ FALSE; stream.SetIndex[streamOrigin + tOrigin]; FillBuffer[]; }; tI ¬ index - tOrigin; IF tI >= tMax THEN FillBuffer[]; char ¬ tB[tI]; RETURN [TRUE]; }; ErrorCode: TYPE = {overflow, number, string, char, atom, escape}; ScanError: PROC [code: ErrorCode, tokenIndex: MimP1.Index] = { Inner: PROC [log: IO.STREAM] = { ErrorContext[log, SELECT code FROM $overflow => "number too large", $number => "invalid number", $string => "string unterminated or too long", $char => "invalid character", $atom => "invalid atom", $escape => "invalid escape sequence", ENDCASE => NIL, tokenIndex]; log.PutChar['\n]; }; nErrors ¬ nErrors + 1; Logger[Inner]; }; ErrorContext: PUBLIC PROC [to: IO.STREAM, message: Rope.ROPE, tokenIndex: MimP1.Index] = { saveIndex: MimP1.Index = IO.GetIndex[stream]; origin: MimP1.Index = streamOrigin + tokenIndex; start: MimP1.Index ¬ origin; lineIndex: MimP1.Index ¬ origin; shown: BOOL ¬ FALSE; FOR n: [1..100] IN [1..100] UNTIL lineIndex = 0 DO lineIndex ¬ lineIndex - 1; IO.SetIndex[stream, lineIndex]; SELECT IO.GetChar[stream] FROM Ascii.CR, Ascii.LF, '\n => EXIT; ENDCASE => start ¬ lineIndex; ENDLOOP; IO.SetIndex[stream, start]; FOR n: [1..100] IN [1..100] DO char: CHAR ¬ IO.GetChar[stream ! IO.EndOfStream => EXIT]; IF start+n > origin THEN IF NOT shown THEN { IO.PutRope[to, " <> "]; shown ¬ TRUE; }; SELECT char FROM Ascii.CR, Ascii.LF, '\n => EXIT; ENDCASE => IO.PutChar[to, char]; ENDLOOP; IO.PutF[to, "\n[%d] %g\n", [integer[origin]], [rope[message]]]; IO.SetIndex[stream, saveIndex]; }; FindClass: PROC [string: STRING] RETURNS [CARDINAL] = { len: CARDINAL = string.length; firstChar: NAT ¬ string[0].ORD; lastChar: NAT ¬ string[len-1].ORD; h: HashIndex ¬ ((firstChar*128-firstChar) + lastChar) MOD HashIndex.LAST + 1; j: CARDINAL; WHILE (j ¬ hashTab[h].symbol) # 0 DO s2: CARDINAL ¬ vocabIndex[j-1]; IF vocabIndex[j]-s2 = len THEN FOR s1: CARDINAL IN [0 .. len) DO IF string[s1] # vocab.text[s2+s1] THEN EXIT; REPEAT FINISHED => RETURN [j]; ENDLOOP; IF (h ¬ hashTab[h].link) = 0 THEN EXIT; ENDLOOP; ERROR; }; endClass: CARDINAL; beginClass: CARDINAL; }. Π MimScanner.mesa Copyright Σ 1985, 1986, 1987, 1988, 1989, 1991 by Xerox Corporation. All rights reserved. Satterthwaite, June 17, 1986 2:26:36 pm PDT Russ Atkinson (RRA) January 4, 1991 1:45 am PST funny constants table installation scanner state also expanded inline within Atom \000\000 is Tioga format for end-of-text The start of a block or enumerated type The end of a block or enumerated type numerical conversion the following 5 procs assume that the host CARD is good for at least 32 bits character and string constants initialization/finalization error handling RRA: make sure that this routine essentially matches MimosaLogImpl.PrintTextLine This marks the spot where the error is. Block source stuff Κ{•NewlineDelimiter –(cedarcode) style™headšœ™Icodešœ ΟeœO™ZLšΟy+™+L™/—˜šΟk ˜ Lš œŸœŸœŸœŸœŸœŸœ˜"Lšœ ŸœB˜RLšœŸœ ˜ LšŸœŸœDŸœ˜dLšœ Ÿœ˜$Lšœ Ÿœ˜#LšœŸœ˜ LšœŸœ"˜-Lšœ Ÿœ ˜Lšœ Ÿœσ˜ƒLšœŸœ ˜LšœŸœŸœ˜Lšœ Ÿœ˜'Lšœ Ÿœ˜*LšœŸœ˜LšœŸœŸœ%˜DLšœŸœ˜LšœŸœ(˜5L˜——šΟn œŸ˜LšŸœ ŸœV˜lLšŸœ ˜LšŸœ ˜L˜—™L˜LšœŸœ Οc)˜HLšœŸœ ‘)˜LLšœ Ÿœ ‘)˜CLšœ Ÿœ ‘)˜DLšœŸœ ‘)˜ELšœ Ÿœ ‘)˜CLšœŸœ ‘)˜ML˜—šœ™L˜LšœŸœ˜LšœŸœ˜LšœŸœ˜LšœŸœ˜ L˜š œŸœŸœ˜!šŸœ ŸœŸœ˜Lšœ%˜%Lšœ%˜%Lšœ)˜)Lšœ$˜$Lšœ˜Lšœ ˜ L˜—L˜L˜——Lšœ ™ ˜LšœŸœŸœŸœ‘˜,Lšœ‘˜6L˜Lš  œŸœŸœŸœŸœŸœ˜+L˜Lšœ Ÿœ˜Lš œ ŸœŸœŸœŸœŸœ˜7L˜LšœŸœŸœ˜L˜L˜Lšœ˜Lšœ˜LšœŸœŸœ˜L˜š  œŸœ˜L˜šŸœ˜ LšŸœ ˜ šŸœ˜LšœŸœ ˜FLšŸœŸœ Ÿœ˜'L˜L˜——LšŸœ Ÿœ˜+L˜L˜L˜—LšœŸœŸœŸœ‘˜/LšœŸœ‘˜/Lšœ‘˜8L˜Lšœ Ÿœ ‘˜#Lšœ Ÿœ ‘˜&L˜Lš  œŸœŸœ˜Lš œŸœŸœ˜LšœŸœ ˜L˜š  œŸœ˜Lšœ ŸœŸœ ˜LšœŸœ˜šŸœŸ˜LšœŸœ˜(Lšœ)˜)LšŸœ˜—Lšœ ŸœŸœ˜Lšœ Ÿœ ŸœŸœ˜+Lšœ5˜5L˜(LšœŸœ ˜#L˜L˜—LšœŸœ‘,˜=LšœŸœŸœ‘.˜BL˜š œŸœ˜Lšœ ™ LšŸœŸœ˜&Lšœ˜Lšœ˜L˜—š  œŸœŸœŸœŸœ˜3Lš ŸœŸœŸœŸœŸœŸœ˜FLšœ˜LšŸœŸœ˜Lšœ˜L˜—š  œŸœŸœŸœ˜7LšœŸœŸœ˜šŸ˜šŸœŸœ Ÿ˜šŸœŸ˜šœ ˜ Lšœ(™(LšŸœŸœŸœŸœ ˜'LšŸœŸœŸœŸœ ˜#Lšœ˜—šŸœ˜ LšŸœŸœŸœŸœ ˜'——LšŸœ˜—L˜Lšœ˜Lšœ˜L˜˜šŸœŸ˜L˜˜lLšœŸœ˜šŸ˜L˜LšŸœŸœ˜&L˜šŸœŸ˜šŸœ Ÿœ Ÿœ ˜(LšŸœŸœ˜)—LšŸœŸœ˜—LšŸœ˜—L˜Lšœ˜Lšœ,˜,LšŸœŸœ ˜Lšœ˜L˜—˜lLšœŸœ˜Lšœ ŸœŸœ˜L˜ šŸ˜L˜LšŸœŸœ˜&L˜šŸœŸ˜LšŸœ ŸœŸœ˜2LšŸœ Ÿœ Ÿœ˜0LšŸœ˜LšŸœŸœ˜—Lšœ Ÿœ˜LšŸœŸœ˜)LšŸœ˜—L˜šŸœŸœ˜Lšœ*Ÿœ Ÿœ˜ALšœŸœ˜ šŸœŸ˜$LšœŸœ˜šŸœŸ˜šŸœŸœŸœ Ÿ˜LšŸœ ŸœŸœ˜,šŸ˜šŸœ˜ Lšœ˜šŸœŸ˜ Lšœ˜Lšœ˜LšŸœ˜—LšŸœŸœ ˜Lšœ˜——LšŸœ˜——LšŸœŸœŸœ˜'LšŸœ˜—Lšœ˜—L˜Lšœ˜Lšœ,˜,LšŸœŸœ ˜Lšœ˜L˜—˜+˜šŸœ˜LšœŸœŸœ˜#LšœŸœŸœ˜,L˜—Lšœ8Ÿœ˜?šŸ˜Lšœ1˜1Lšœ5˜5—L˜—LšŸœŸœ ˜Lšœ˜L˜—˜L˜ šŸœ ˜ LšŸœŸœŸœ ˜/LšŸœŸœŸœ ˜0—L˜L˜—˜L™'Lšœ˜Lšœ˜LšŸœŸœ ˜L˜L˜—˜L™%Lšœ˜Lšœ ˜ LšŸœŸœ ˜L˜L˜—˜Lšœ˜L˜ LšŸœŸœ ˜Lšœ˜L˜—˜Lšœ˜L˜ LšŸœ ŸœŸœŸœ ˜šŸ˜Lšœ1˜1Lšœ5˜5—L˜—LšŸœŸœ ˜Lšœ˜—LšŸœŸœŸœ ˜3—šœ˜L˜——˜L˜ šŸœ ˜ LšŸœŸœŸœ ˜.LšŸœŸœŸœ ˜/—Lšœ˜L˜—˜L˜ šŸœŸ˜LšœŸœŸœ ˜-LšœŸœŸœ ˜LšŸœŸœŸœ ˜4—Lšœ˜L˜—˜L˜ šŸœ ˜ LšŸœŸœŸœ ˜+LšŸœŸœŸœ ˜1—Lšœ˜L˜—˜L˜ šŸœŸ˜LšœŸœŸœ ˜-LšœŸœŸœ ˜-LšœŸœŸœ ˜-Lšœ ŸœŸœ ˜/LšŸœŸœŸœ ˜5—šœ˜L˜——šœŸœŸœ ˜)L˜—šœ*ŸœŸœ ˜9L˜—šœ+ŸœŸœ ˜:L˜—šœ,ŸœŸœ ˜;L˜—šœ*ŸœŸœ ˜9L˜—šŸœ˜ LšœŸœ ŸœŸœ˜7LšŸœŸœŸœŸœ ˜&L˜ Lšœ˜L˜L˜——šŸœ˜Lšœ2˜2LšœŸœ˜šŸ˜LšŸœŸœŸœŸœ ˜'šŸœŸ˜šœŸœŸ˜Lšœ+˜+Lšœ ŸœŸœŸœ˜DLšŸœ˜—šœŸœŸ˜Lšœ+˜+Lšœ0˜0LšŸœ˜—Lšœ8˜8Lšœ(ŸœŸœŸœ˜LLšŸœ˜—LšŸœ˜—Lšœ ˜ Lšœ˜—L˜L˜—šŸ˜Lšœ ŸœŸœ˜BLšœ˜˜ Lšœ˜Lšœ˜Lšœ˜LšŸœŸœŸœ‘˜5L˜ L˜L˜——LšŸœ˜—šŸœŸ˜Lšœ+˜+—L˜Lšœ˜L˜——Lšœ™˜Lšœ Ÿœ˜!Lšœ Ÿœ ˜L˜š   œŸœŸœ ŸœŸœ)˜_Lšœ Ÿœ˜Lš œŸœŸœŸœ Ÿœ Ÿœ˜5L˜š œŸœŸœ˜L˜LšŸœŸœ˜)L˜ L˜—L˜šŸ˜šŸœŸ˜LšŸœ˜˜ Lšœ˜Lšœ˜Lšœ ˜ Lš ŸœŸœ Ÿœ Ÿœ Ÿœ ˜ILšœ˜—LšŸœA˜CšŸœ˜Lšœ˜Lšœ˜Lšœ ˜ Lšœ˜—šœ ˜ Lšœ˜Lšœ˜Lšœ ˜ Lšœ˜—˜LšŸœŸœŸœŸœ˜#L˜ LšŸœ Ÿœ ŸœŸœ˜&LšœŸœ˜ L˜LšŸœŸœ˜)Lšœ˜—LšŸœŸœ˜—LšŸœ˜—L˜šŸœŸœŸ˜Lšœ ŸœŸœ ˜Lšœ(˜(Lšœ(˜(Lšœ$˜$šŸœ˜ šŸœŸ˜Lšœ+˜+Lšœ'˜'Lšœ-˜-LšœŸœŸœ ˜%LšŸœ˜———šŸœŸ˜šœ˜LšœŸœ˜LšœŸœ ˜LšœŸœ˜LšœŸœŸœ˜Lš ŸœŸœŸœ ŸœŸœŸœ˜&šŸœŸœ Ÿ˜$Lšœ˜Lšœ˜L˜LšŸœŸœŸœ˜)LšŸœ˜—Lš ŸœŸœŸœŸœŸœŸœ˜L˜—LšŸœ˜—LšŸœ ŸœŸœŸœ˜ LšŸœ Ÿœ˜"L˜—LšŸœ˜ Lšœ˜L˜—š  œŸœŸœŸœŸœ˜1Lšœ˜˜LšœŸœ˜LšœŸœ˜Lš ŸœŸœŸœ ŸœŸœŸœ˜&šŸ˜Lšœ˜šŸœŸ˜ LšŸœ˜LšŸœ˜Lš ŸœŸœŸœŸœŸœ˜8LšŸœŸœ˜—LšœG˜GL˜ Lšœ ˜ LšŸœ˜—šŸœŸœŸœ˜LšœŸœ˜LšŸœŸœ Ÿœ"Ÿœ˜MLšŸœŸœ!Ÿœ˜@L˜—LšŸœ ŸœŸœŸœ˜ LšŸœ Ÿœ˜"L˜—LšŸœ˜ Lšœ˜L˜——Lšœ™˜Lšœ Ÿœ˜L˜š  œŸœŸœŸœŸ œ˜=L˜ šŸœŸœ˜L˜ šŸœŸ˜Lšœ!˜!LšœŸœ˜LšœŸœ˜LšœŸœ˜LšœŸœ˜LšœŸœ˜L˜šŸœ˜LšœŸœ˜šŸ˜Lš ŸœŸœŸœ ŸœŸœŸœ˜?L˜LšŸœŸœŸœ˜L˜ LšŸœ˜—LšŸœŸœ Ÿœ ˜2L˜ L˜—LšŸœŸ˜"—Lšœ˜—Lšœ˜L˜——Lšœ™˜š œŸœŸœ ŸœŸœ ŸœŸœŸœŸœ˜SL˜L˜Lšœ˜Lš Ÿœ ŸœŸœŸœŸœŸœ˜VLšœ Ÿœ ŸœŸœ˜=L˜(Lšœ!˜!LšœŸœ ˜'Lšœ˜Lšœ ˜ LšœŸœ˜Lšœ ˜ Lšœ˜L˜L˜L˜—š   œŸœŸœŸœŸœŸœ˜-LšŸœ˜Lšœ˜L˜—š  œŸœŸœ˜LšœŸœ ˜ LšŸœŸœŸœŸœ˜-Lšœ Ÿœ˜ Lšœ Ÿœ˜ Lšœ˜L˜——Lšœ™˜š  œŸœŸœŸœ Ÿœ˜LšŸœŸœŸœŸœ˜,LšœŸœ˜!Lšœ"˜"Lšœ ˜ Lšœ Ÿœ˜Lšœ(˜(L˜ L˜—L˜LšŸœ Ÿœ˜ Lšœ˜LšŸœŸœ˜Lšœ˜L˜—Lšœ Ÿœ2˜AL˜š  œŸœ/˜>š œŸœŸœŸœ˜ ˜šŸœŸ˜L˜ L˜L˜-L˜L˜L˜%LšŸœŸœ˜—L˜ —Lšœ˜Lšœ˜—L˜L˜L˜L˜—š   œŸœŸœŸœŸœŸœ˜ZLšœP™PLšœŸœ˜-Lšœ0˜0Lšœ˜Lšœ ˜ LšœŸœŸœ˜šŸœ Ÿœ ŸœŸ˜2L˜LšŸœ˜šŸœŸœŸ˜LšœŸœŸœ Ÿœ˜ LšŸœ˜—LšŸœ˜—LšŸœ˜šŸœ Ÿœ Ÿ˜Lš œŸœŸœŸœŸœ˜9šŸœŸ˜šŸœŸœŸœ˜L™'LšŸœ˜LšœŸœ˜ Lšœ˜——šŸœŸ˜LšœŸœŸœ Ÿœ˜ LšŸœŸœ˜ —LšŸœ˜—LšŸœ=˜?LšŸœ˜Lšœ˜L˜——™L™š   œŸœ ŸœŸœŸœ˜7LšœŸœ˜Lšœ Ÿœ Ÿœ˜Lšœ ŸœŸœ˜"Lšœ6Ÿœ Ÿœ˜MLšœŸœ˜ šŸœŸ˜$LšœŸœ˜šŸœŸ˜šŸœŸœŸœ Ÿ˜!LšŸœ ŸœŸœ˜,šŸ˜LšŸœŸœ˜—LšŸœ˜——LšŸœŸœŸœ˜'LšŸœ˜—LšŸœ˜L˜—Lšœ Ÿœ˜šœ Ÿœ˜L˜——˜L˜——…—O†qΡ