DIRECTORY Ascii USING [BS, ControlZ, CR, FF, LF, NUL, TAB], ConvertUnsafe USING [SubString], IO USING [card, EndOfStream, GetChar, GetIndex, int, Put1, PutChar, PutF, rope, SetIndex, STREAM, UnsafeGetBlock], MobHashOps USING [EnterString], MobP1 USING [Token, Value, nullValue], MobParseTable USING [endMarker, HashIndex, HashTable, HashTableRef, IndexTable, IndexTableRef, InitHashTable, InitIndexTable, InitScanTable, InitVocabulary, ScanTableRef, tokenID, tokenSTR, TSymbol, VocabularyRef], RefText USING [Append], Rope USING [ROPE], VM USING [bytesPerPage, logBytesPerPage, PageCount, PagesForBytes]; MobScanner: PROGRAM IMPORTS MobHashOps, IO, RefText, MobParseTable, VM EXPORTS MobP1 = { OPEN MobParseTable; hashTab: HashTableRef; scanTab: ScanTableRef; vocab: VocabularyRef; vocabIndex: IndexTableRef; InstallScanTable: PUBLIC PROC[] = { hashTab ¬ MobParseTable.InitHashTable[]; scanTab ¬ MobParseTable.InitScanTable[]; vocab ¬ MobParseTable.InitVocabulary[]; vocabIndex ¬ MobParseTable.InitIndexTable[]}; TypeSym: PUBLIC PROC[log: IO.STREAM, sym: TSymbol] = { log.PutChar[' ]; IF sym IN [1..endMarker) THEN FOR i: NAT IN [vocabIndex[sym-1]..vocabIndex[sym]) DO log.PutChar[LOOPHOLE[vocab, LONG STRING][i]] ENDLOOP ELSE log.Put1[IO.int[sym]]}; stream: IO.STREAM ¬ NIL; -- the input stream streamOrigin: StreamIndex; Logger: PROC[PROC [log: IO.STREAM]] ¬ NIL; textChars: NAT ~ 4096; TextBuffer: TYPE ~ PACKED ARRAY [0..textChars) OF CHAR; tB: REF TextBuffer; tI, tMax: [0..textChars]; tOrigin, tLimit: CARDINAL ¬ 0; tEnded: BOOL ¬ FALSE; FillBuffer: PROC ~ { tOrigin ¬ tLimit; IF tEnded THEN tMax ¬ 0 ELSE { tMax ¬ stream.UnsafeGetBlock[[LOOPHOLE[tB], 0, textChars]].nBytesRead; IF tMax < textChars THEN tEnded ¬ TRUE; tLimit ¬ tOrigin + tMax}; IF tMax = 0 THEN {tB[0] ¬ Ascii.NUL; tMax ¬ 1}; tI ¬ 0}; buffer: REF TEXT ¬ NIL; -- token assembly area iMax: CARDINAL; -- iMax = buffer.maxLength desc: ConvertUnsafe.SubString; -- initial buffer segment nTokens: NAT ¬ 0; -- token count nErrors: NAT; -- lexical errors BufferOverflow: ERROR ~ CODE; ExpandBuffer: PROC ~ { oldBuffer: REF TEXT ¬ buffer; IF oldBuffer.length > 2000 THEN ERROR BufferOverflow; buffer ¬ NEW[TEXT[2*oldBuffer.length]]; desc.base ¬ LOOPHOLE[buffer, LONG STRING]; buffer ¬ RefText.Append[buffer, oldBuffer]; iMax ¬ buffer.length ¬ buffer.maxLength; oldBuffer ¬ NIL}; char: CHAR; -- current (most recently scanned) character qDot: BOOL; -- used to resolved decimal point vs. interval NextChar: PROC ~ { -- also expanded inline within Atom IF (tI¬tI+1) = tMax THEN FillBuffer[]; char ¬ tB[tI]}; NextToken: PUBLIC PROC RETURNS[token: MobP1.Token] ~ { --OPEN token; DO WHILE char IN [Ascii.NUL..' ] DO SELECT char FROM Ascii.NUL => { -- ­@­@ is Tioga escape seq IF (tI¬tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char ¬ tB[tI]; IF char = Ascii.NUL THEN GO TO EndFile}; Ascii.ControlZ => -- ­Z is Bravo escape char UNTIL char = Ascii.CR OR char = Ascii.LF DO IF (tI¬tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char ¬ tB[tI]; ENDLOOP; ENDCASE => { IF (tI¬tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char ¬ tB[tI]}; ENDLOOP; token.index ¬ tOrigin + tI; token.value ¬ MobP1.nullValue; SELECT char FROM 'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm, 'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z => { i: CARDINAL ¬ 0; DO buffer[i] ¬ char; IF (tI¬tI+1) = tMax THEN FillBuffer[]; char ¬ tB[tI]; SELECT char FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => IF (i ¬ i+1) >= iMax THEN ExpandBuffer[]; ENDCASE => EXIT; ENDLOOP; desc.length ¬ i+1; token.class ¬ tokenID; token.value.r ¬ LOOPHOLE[MobHashOps.EnterString[desc]]; GO TO GotNext}; 'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M, 'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z => { i: CARDINAL ¬ 0; uId: BOOL ¬ TRUE; first, last: NAT ¬ char.ORD; DO buffer[i] ¬ char; IF (tI¬tI+1) = tMax THEN FillBuffer[]; char ¬ tB[tI]; SELECT char FROM IN ['A..'Z] => { last ¬ char.ORD; IF (i ¬ i+1) >= iMax THEN ExpandBuffer[]}; IN ['a..'z], IN ['0..'9] => { uId ¬ FALSE; IF (i ¬ i+1) >= iMax THEN ExpandBuffer[]}; ENDCASE => EXIT; ENDLOOP; i ¬ i+1; IF uId THEN { h: HashIndex ¬ ((first*128-first) + last) MOD HashIndex.LAST + 1; j, s1, s2: CARDINAL; WHILE (j ¬ hashTab[h].symbol) # 0 DO IF vocabIndex[j]-(s2¬vocabIndex[j-1]) = i THEN FOR s1 IN [0 .. i) DO IF buffer[s1] # vocab.text[s2] THEN EXIT; s2 ¬ s2+1; REPEAT FINISHED => {token.class ¬ j; GO TO GotNext}; ENDLOOP; IF (h ¬ hashTab[h].link) = 0 THEN EXIT; ENDLOOP}; desc.length ¬ i; token.class ¬ tokenID; token.value.r ¬ LOOPHOLE[MobHashOps.EnterString[desc]]; GO TO GotNext}; ',, ';, ':, '_, '#, '~, '+, '*, '/, '^, '@, '!, '=, '., '(, '), '[, '], '{, '} => { token.class ¬ scanTab[char]; GO TO GetNext}; '" => { i: CARDINAL ¬ 0; valid: BOOL; advance: BOOL ¬ TRUE; DO IF advance THEN { IF (tI¬tI+1) = tMax THEN {IF tEnded THEN GO TO EOFEnd; FillBuffer[]}; char ¬ tB[tI]}; SELECT char FROM '" => { IF (tI¬tI+1) = tMax THEN FillBuffer[]; char ¬ tB[tI]; IF char # '" THEN GO TO QuoteEnd}; ENDCASE; IF i >= iMax THEN ExpandBuffer[ ! BufferOverflow => {ScanError[string, token.index]; i ¬ 0; CONTINUE}]; [buffer[i], valid, advance] ¬ Escape[]; i ¬ i+1; IF ~valid THEN ScanError[$escape, tOrigin + tI]; REPEAT QuoteEnd => NULL; EOFEnd => {ScanError[$string, token.index]; FillBuffer[]; char ¬ tB[tI]}; ENDLOOP; desc.length ¬ i; token.value.r ¬ LOOPHOLE[MobHashOps.EnterString[desc]]; token.class ¬ tokenSTR; GO TO GotNext}; '- => { NextChar[]; IF char # '- THEN { token.class ¬ scanTab['-]; IF token.class = 0 THEN ScanError[char, token.index-1]; GO TO GotNext}; char ¬ Ascii.NUL; DO pChar: CHAR ~ char; IF (tI¬tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char ¬ tB[tI]; SELECT char FROM '- => IF pChar = '- THEN EXIT; Ascii.CR, Ascii.LF => EXIT; ENDCASE; ENDLOOP; NextChar[]}; '< => { NextChar[]; SELECT char FROM '< => { state: {plain, leftBrocket, rightBrocket} ¬ $plain; nest: CARDINAL ¬ 1; DO IF (tI¬tI+1) = tMax THEN { IF tEnded THEN GO TO EndFile; FillBuffer[]}; char ¬ tB[tI]; SELECT char FROM '> => SELECT state FROM $plain, $leftBrocket => state ¬ $rightBrocket; $rightBrocket => { state ¬ $plain; nest ¬ nest - 1; IF nest = 0 THEN EXIT}; ENDCASE; '< => SELECT state FROM $plain, $rightBrocket => state ¬ $leftBrocket; $leftBrocket => {state ¬ $plain; nest ¬ nest + 1}; ENDCASE; ENDCASE => state ¬ $plain; ENDLOOP; NextChar[]}; ENDCASE => ScanError[$char, token.index]}; ENDCASE => { token.class ¬ scanTab[char]; IF token.class # 0 THEN GO TO GetNext; NextChar[]; ScanError[$char, token.index]}; REPEAT GetNext => {IF (tI¬tI+1) = tMax THEN FillBuffer[]; char ¬ tB[tI]}; GotNext => NULL; EndFile => { token.class ¬ endMarker; token.index ¬ tOrigin + (tI-1); token.value ¬ MobP1.nullValue; UNTIL tEnded DO FillBuffer[] ENDLOOP; -- flush stream FillBuffer[]; char ¬ tB[tI]}; ENDLOOP; nTokens ¬ nTokens + 1; RETURN}; Digit: ARRAY CHAR ['0..'9] OF [0..9] ~ [0,1,2,3,4,5,6,7,8,9]; escapeMark: CHAR ~ '\\; Escape: PROC RETURNS[c: CHAR, valid, advance: BOOL¬TRUE] ~ { c ¬ char; IF c = escapeMark THEN { NextChar[]; SELECT char FROM 'n, 'N => c ¬ Ascii.CR; 'r, 'R => c ¬ Ascii.CR; 'l, 'L => c ¬ Ascii.LF; 't, 'T => c ¬ Ascii.TAB; 'b, 'B => c ¬ Ascii.BS; 'f, 'F => c ¬ Ascii.FF; '', '", escapeMark => c ¬ char; IN ['0 .. '7] => { nc, v: CARDINAL ¬ 0; DO IF ~(char IN ['0..'7]) THEN {valid ¬ advance ¬ FALSE; EXIT}; v ¬ 8*v + Digit[char]; IF (nc ¬ nc+1) = 3 THEN EXIT; NextChar[]; ENDLOOP; IF v > 377b THEN {valid ¬ FALSE; v ¬ 0}; c ¬ VAL[v]}; ENDCASE => valid ¬ advance ¬ FALSE}; RETURN}; ScanInit: PUBLIC PROC[ source: IO.STREAM, logger: PROC [PROC [log: IO.STREAM]]] ~ { stream ¬ source; Logger ¬ logger; IF buffer = NIL THEN buffer ¬ NEW[TEXT[256]]; desc.base ¬ LOOPHOLE[buffer, LONG STRING]; desc.offset ¬ 0; iMax ¬ buffer.length ¬ buffer.maxLength; streamOrigin ¬ IO.GetIndex[stream]; tB ¬ NEW[TextBuffer]; tOrigin ¬ tLimit ¬ 0; tMax ¬ 0; tEnded ¬ FALSE; FillBuffer[]; char ¬ tB[tI]; qDot ¬ FALSE; nTokens ¬ nErrors ¬ 0}; ScanStats: PUBLIC PROC RETURNS[NAT, NAT] ~ { RETURN[nTokens, nErrors]}; ScanReset: PUBLIC PROC ~ { IF buffer # NIL THEN FREE[@buffer]; IF tB # NIL THEN FREE[@tB]; desc.base ¬ NIL; stream ¬ NIL; Logger ¬ NIL}; StreamIndex: TYPE ~ INT; -- FileStream.FileByteIndex charsPerPage: CARDINAL ¬ VM.PagesForBytes[1]/BYTES[CHAR]; ResetScanIndex: PUBLIC PROC[index: CARDINAL] RETURNS[success: BOOL] ~ { IF ~(index IN [tOrigin .. tLimit)) THEN { page: CARDINAL ~ index/charsPerPage; tOrigin ¬ tLimit ¬ page*charsPerPage; tMax ¬ 0; tEnded ¬ FALSE; IO.SetIndex[stream, streamOrigin + tOrigin]; FillBuffer[]}; tI ¬ index - tOrigin; IF tI >= tMax THEN FillBuffer[]; char ¬ tB[tI]; RETURN[TRUE]}; ErrorCode: TYPE ~ {number, string, char, atom, escape}; ScanError: PROC[code: ErrorCode, tokenIndex: CARDINAL] ~ { Inner: PROC [log: IO.STREAM] ~ { ErrorContext[log, SELECT code FROM $number => "invalid number", $string => "string unterminated or too long", $char => "invalid character", $atom => "invalid atom", $escape => "invalid escape sequence", ENDCASE => NIL, tokenIndex]; WriteCR[log]}; nErrors ¬ nErrors + 1; Logger[Inner]}; WriteCR: PROC[stream: IO.STREAM] = { stream.PutChar['\012]}; ErrorContext: PUBLIC PROC[ to: IO.STREAM, message: Rope.ROPE, tokenIndex: CARDINAL] ~ { saveIndex: StreamIndex ~ IO.GetIndex[stream]; origin: StreamIndex ~ streamOrigin + tokenIndex; start, lineIndex: StreamIndex ¬ origin; char: CHAR; n: [1..100]; FOR n IN [1..100] UNTIL lineIndex = 0 DO lineIndex ¬ lineIndex - 1; IO.SetIndex[stream, lineIndex]; char ¬ stream.GetChar[]; IF char = Ascii.CR OR char = Ascii.LF THEN EXIT; start ¬ lineIndex; ENDLOOP; IO.SetIndex[stream, start]; FOR n IN [1..100] DO char ¬ stream.GetChar[ ! IO.EndOfStream => EXIT]; SELECT char FROM Ascii.CR, Ascii.LF, Ascii.ControlZ => EXIT; ENDCASE => IO.PutChar[to, char]; ENDLOOP; WriteCR[to]; IO.SetIndex[stream, start]; UNTIL IO.GetIndex[stream] = origin DO char ¬ stream.GetChar[ ! IO.EndOfStream => EXIT]; IO.PutChar[to, IF char = Ascii.TAB THEN '\t ELSE ' ]; ENDLOOP; IO.PutF[to, "^ %g[%d]", IO.rope[message], IO.card[tokenIndex]]; WriteCR[to]; IO.SetIndex[stream, saveIndex]}; }. d MobScanner.mesa - derived from Compiler>Scanner.mesa Copyright Σ 1985, 1989, 1991, 1992 by Xerox Corporation. All rights reserved. Satterthwaite, February 4, 1986 2:23:17 pm PST Maxwell, August 11, 1983 2:22 pm Paul Rovner, September 22, 1983 9:49 pm Russ Atkinson (RRA) March 7, 1985 0:57:59 am PST Andy Litman May 30, 1988 8:15:33 pm PDT JKF July 22, 1989 3:39:26 pm PDT table installation scanner state numerical conversion character and string constants initialization/finalization error handling charsPerPage: CARDINAL _ VM.bytesPerPage/BYTES[CHAR]; so the sources can be the same in both worlds: ΚŸ–(cedarcode) style•NewlineDelimiter ™codešœ4™4Kšœ ΟeœC™NKšΟy+Πky™.Kšž ™ Kšž'™'KšœΟkœ ™0Kšž$Ÿ™'Kš œ™ K™—š  ˜ Kšœ œ œ  œ œ œ œ œ˜1Kšœ œ ˜ Kš œ œR œ˜rKšœ  œ˜Kšœ œ˜&Kšœ œΓ˜ΦKšœ œ ˜Kšœ œ œ˜Kš œ œ;˜CK˜—šΟn œ ˜Kš œ  œ ˜2Kš œ ˜Kš œ˜K˜—Kšœ™˜K˜K˜K˜K˜K˜š‘œ œ œ˜#K˜(K˜(Kšœ'˜'K˜-K˜—š ‘œ œ œ œ œ˜6K˜š œ œ ˜š œ œ œ& ˜5Kšœ  œ œ œ ˜4——Kš œ  œ ˜K˜——Kšœ ™ ˜Kšœ œ œ œΟc˜-K˜K˜Kš ‘œ œ œ œ œ œ˜*K˜Kšœ  œ˜Kš œ  œ œ œ œ œ˜7K˜Kšœ œ ˜K˜Kšœ  œ˜Kšœ  œ˜K˜K˜š‘ œ œ˜K˜Kš œ œ ˜š œ˜Kšœ œ ˜FKš œ œ  œ˜'K˜—Kš œ  œ œ ˜/K˜K˜K˜—Kšœ œ œ œ’˜/Kšœ œ’˜,Kšœ’˜8K˜Kšœ  œ’˜#Kšœ  œ’˜"K˜Kš‘œ œ œ˜K˜š‘ œ œ˜Kšœ  œ œ ˜Kš œ œ œ˜5Kšœ  œ œ˜'Kšœ  œ  œ œ˜*Kšœ+˜+K˜(Kšœ  œ˜K˜K˜—Kšœ œ’,˜9Kšœ œ’.˜;K˜š‘œ œ’#˜6Kš œ œ˜6K˜K˜—š‘ œ œ œ œ˜6Kš œ˜ š ˜š œ œ œ ˜ š œ ˜šœ œ’˜+Kš  œ œ œ œ œ œ˜FK˜Kš  œ œ œ œ œ ˜(—šœ’˜-š  œ œ œ œ ˜+Kš  œ œ œ œ œ œ˜FK˜Kš œ˜——š œ˜ Kš  œ œ œ œ œ œ˜FK˜——Kš œ˜—Kšœ;˜;Kš œ ˜˜K˜3˜8Kšœ œ˜š ˜K˜Kš œ œ˜&K˜š œ ˜š œ  œ  œ ˜(Kš œ œ˜)—Kš œ œ˜—Kš œ˜—K˜Kšœ( œ˜OKš œ œ ˜K˜—K˜3˜8Kšœ œ˜Kšœ œ œ˜Kšœ  œ œ˜š ˜K˜Kš œ œ˜&K˜š œ ˜š œ˜Kšœ  œ œ œ˜;—š œ  œ˜Kšœ œ œ œ˜7—Kš œ œ˜—Kš œ˜—K˜š œ œ˜ Kšœ* œ  œ˜AKšœ  œ˜š œ ˜$š œ( ˜.š œ œ  ˜Kš œ œ œ˜)K˜ š ˜Kš œ œ œ ˜-—Kš œ˜——Kš œ œ œ˜'Kš œ˜ ——K˜Kšœ( œ˜OKš œ œ ˜K˜—K˜/K˜˜Kšœ œ œ ˜,K˜—˜Kšœ œ˜Kšœ œ˜ Kšœ  œ œ˜š œ˜š œ  œ˜Kš  œ œ œ œ œ œ˜EK˜—š œ ˜˜Kš œ œ˜&K˜Kš œ  œ œ œ ˜"—Kš œ˜—š œ  œ˜Kšœ< œ˜G—K˜1Kš œ œ"˜0š ˜Kšœ  œ˜KšœI˜I—Kš œ˜—K˜Kšœ œ˜7Kšœ œ œ ˜'K˜—˜K˜ š œ  œ˜K˜Kš œ œ ˜7Kš œ œ ˜—Kšœ  œ˜š ˜Kšœ œ˜Kš  œ œ œ œ œ œ˜GK˜š œ ˜Kšœ œ  œ œ˜Kšœ œ œ œ˜Kš œ˜—Kš œ˜—K˜ K˜—˜K˜ š œ ˜˜K˜3Kšœ œ˜š ˜š œ œ˜Kš œ œ œ œ˜,—K˜š œ ˜šœ œ ˜K˜.˜Kšœ! œ  œ œ˜8—Kš œ˜—šœ œ ˜K˜.K˜2Kš œ˜—Kš œ˜—Kš œ˜—K˜ —Kš œ#˜*K˜——š œ˜ K˜Kš œ œ œ œ ˜&K˜ Kšœ˜K˜——š ˜Kšœ  œ œ˜BKšœ  œ˜˜ KšœY˜YKš œ œ œ’˜6K˜——Kš œ˜—K˜Kš œ˜K˜K˜——Kšœ™˜Kš‘œ œ œ  œ ˜=K˜K˜—Kšœ™˜Kšœ  œ˜K˜š ‘œ œ œ œ  œ˜