DIRECTORY Ascii: TYPE USING [BS, ControlZ, CR, FF, LF, NUL, TAB], Basics: TYPE USING [charsPerWord, RawBytes], ConvertUnsafe: TYPE USING [SubString], IO: TYPE USING [card, EndOf, GetIndex, GetChar, PutChar, PutF, rope, SetIndex, STREAM, UnsafeGetBlock], HashOps: TYPE USING [EnterString], P1: TYPE USING [Token, Value, nullValue], ParseTable: TYPE USING [HashIndex, HashTableRef, IndexTableRef, ScanTableRef, TableRef, VocabularyRef, endMarker, tokenID, tokenSTR], RefText: TYPE USING [Append], Rope: TYPE USING [ROPE], VM: TYPE USING [wordsPerPage]; Scanner: PROGRAM IMPORTS HashOps, IO, RefText EXPORTS P1 = { OPEN ParseTable; tablePtr: ParseTable.TableRef; hashTab: HashTableRef; scanTab: ScanTableRef; vocab: VocabularyRef; vocabIndex: IndexTableRef; InstallScanTable: PUBLIC PROC[base: ParseTable.TableRef] = { tablePtr _ base; hashTab _ @tablePtr[tablePtr.scanTable.hashTab]; scanTab _ @tablePtr[tablePtr.scanTable.scanTab]; vocab _ LOOPHOLE[@tablePtr[tablePtr.scanTable.vocabBody]]; vocabIndex _ @tablePtr[tablePtr.scanTable.vocabIndex]}; stream: IO.STREAM _ NIL; -- the input stream streamOrigin: StreamIndex; Logger: PROC[PROC [log: IO.STREAM]] _ NIL; textPages: NAT ~ 6; textWords: NAT ~ textPages*VM.wordsPerPage; textChars: NAT ~ textWords*Basics.charsPerWord; TextBuffer: TYPE ~ PACKED ARRAY [0..textChars) OF CHAR; tB: REF TextBuffer; tI, tMax: [0..textChars]; tOrigin, tLimit: CARDINAL; tEnded: BOOL; FillBuffer: PROC ~ { tOrigin _ tLimit; IF tEnded THEN tMax _ 0 ELSE { tMax _ stream.UnsafeGetBlock [[LOOPHOLE[tB, LONG POINTER TO Basics.RawBytes], 0, textChars ]].nBytesRead; IF tMax < textChars THEN tEnded _ TRUE; tLimit _ tOrigin + tMax}; IF tMax = 0 THEN {tB[0] _ Ascii.NUL; tMax _ 1}; tI _ 0}; buffer: REF TEXT _ NIL; -- token assembly area iMax: CARDINAL; -- iMax = buffer.maxLength desc: ConvertUnsafe.SubString; -- initial buffer segment nTokens: NAT; -- token count nErrors: NAT; -- lexical errors BufferOverflow: ERROR ~ CODE; ExpandBuffer: PROC ~ { oldBuffer: REF TEXT _ buffer; IF oldBuffer.length > 2000 THEN ERROR BufferOverflow; buffer _ NEW[TEXT[2*oldBuffer.length]]; desc.base _ LOOPHOLE[buffer, LONG STRING]; buffer _ RefText.Append[buffer, oldBuffer]; iMax _ buffer.length _ buffer.maxLength; oldBuffer _ NIL}; char: CHAR; -- current (most recently scanned) character qDot: BOOL; -- used to resolved decimal point vs. interval NextChar: PROC ~ { -- also expanded inline within Atom IF (tI_tI+1) = tMax THEN FillBuffer[]; char _ tB[tI]}; NextToken: PUBLIC PROC RETURNS[token: P1.Token] ~ { OPEN token; DO WHILE char IN [Ascii.NUL..' ] DO SELECT char FROM Ascii.NUL => { -- ^@^@ is Tioga escape seq IF (tI_tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char _ tB[tI]; IF char = Ascii.NUL THEN GO TO EndFile}; Ascii.ControlZ => -- ^Z is Bravo escape char UNTIL char = Ascii.CR DO IF (tI_tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char _ tB[tI]; ENDLOOP; ENDCASE => { IF (tI_tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char _ tB[tI]}; ENDLOOP; index _ tOrigin + tI; value _ P1.nullValue; SELECT char FROM 'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm, 'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z => { i: CARDINAL _ 0; DO buffer[i] _ char; IF (tI_tI+1) = tMax THEN FillBuffer[]; char _ tB[tI]; SELECT char FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => IF (i _ i+1) >= iMax THEN ExpandBuffer[]; ENDCASE => EXIT; ENDLOOP; desc.length _ i+1; class _ tokenID; value.r _ HashOps.EnterString[desc]; GO TO GotNext}; 'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M, 'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z => { i: CARDINAL _ 0; uId: BOOL _ TRUE; first, last: NAT _ char.ORD; DO buffer[i] _ char; IF (tI_tI+1) = tMax THEN FillBuffer[]; char _ tB[tI]; SELECT char FROM IN ['A..'Z] => { last _ char.ORD; IF (i _ i+1) >= iMax THEN ExpandBuffer[]}; IN ['a..'z], IN ['0..'9] => { uId _ FALSE; IF (i _ i+1) >= iMax THEN ExpandBuffer[]}; ENDCASE => EXIT; ENDLOOP; i _ i+1; IF uId THEN { h: HashIndex _ ((first*128-first) + last) MOD HashIndex.LAST + 1; j, s1, s2: CARDINAL; WHILE (j _ hashTab[h].symbol) # 0 DO IF vocabIndex[j]-(s2_vocabIndex[j-1]) = i THEN FOR s1 IN [0 .. i) DO IF buffer[s1] # vocab.text[s2] THEN EXIT; s2 _ s2+1; REPEAT FINISHED => {class _ j; GO TO GotNext}; ENDLOOP; IF (h _ hashTab[h].link) = 0 THEN EXIT; ENDLOOP}; desc.length _ i; class _ tokenID; value.r _ HashOps.EnterString[desc]; GO TO GotNext}; ',, ';, ':, '_, '#, '~, '+, '*, '/, '^, '@, '!, '=, '., '(, '), '[, '], '{, '} => { class _ scanTab[char]; GO TO GetNext}; '" => { i: CARDINAL _ 0; valid: BOOL; advance: BOOL _ TRUE; DO IF advance THEN { IF (tI_tI+1) = tMax THEN {IF tEnded THEN GO TO EOFEnd; FillBuffer[]}; char _ tB[tI]}; SELECT char FROM '" => { IF (tI_tI+1) = tMax THEN FillBuffer[]; char _ tB[tI]; IF char # '" THEN GO TO QuoteEnd}; ENDCASE; IF i >= iMax THEN ExpandBuffer[ ! BufferOverflow => {ScanError[string, index]; i _ 0; CONTINUE}]; [buffer[i], valid, advance] _ Escape[]; i _ i+1; IF ~valid THEN ScanError[$escape, tOrigin + tI]; REPEAT QuoteEnd => NULL; EOFEnd => {ScanError[$string, index]; FillBuffer[]; char _ tB[tI]}; ENDLOOP; desc.length _ i; value.r _ HashOps.EnterString[desc]; class _ tokenSTR; GO TO GotNext}; '- => { NextChar[]; IF char # '- THEN { class _ scanTab['-]; IF class = 0 THEN ScanError[char, index-1]; GO TO GotNext}; char _ Ascii.NUL; DO pChar: CHAR ~ char; IF (tI_tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char _ tB[tI]; SELECT char FROM '- => IF pChar = '- THEN EXIT; Ascii.CR => EXIT; ENDCASE; ENDLOOP; NextChar[]}; '< => { NextChar[]; SELECT char FROM '< => { state: {plain, leftBrocket, rightBrocket} _ $plain; nest: CARDINAL _ 1; DO IF (tI_tI+1) = tMax THEN { IF tEnded THEN GO TO EndFile; FillBuffer[]}; char _ tB[tI]; SELECT char FROM '> => SELECT state FROM $plain, $leftBrocket => state _ $rightBrocket; $rightBrocket => { state _ $plain; nest _ nest - 1; IF nest = 0 THEN EXIT}; ENDCASE; '< => SELECT state FROM $plain, $rightBrocket => state _ $leftBrocket; $leftBrocket => {state _ $plain; nest _ nest + 1}; ENDCASE; ENDCASE => state _ $plain; ENDLOOP; NextChar[]}; ENDCASE => ScanError[$char, index]}; ENDCASE => { class _ scanTab[char]; IF class # 0 THEN GO TO GetNext; NextChar[]; ScanError[$char, index]}; REPEAT GetNext => {IF (tI_tI+1) = tMax THEN FillBuffer[]; char _ tB[tI]}; GotNext => NULL; EndFile => { class _ endMarker; index _ tOrigin + (tI-1); value _ P1.nullValue; UNTIL tEnded DO FillBuffer[] ENDLOOP; -- flush stream FillBuffer[]; char _ tB[tI]}; ENDLOOP; nTokens _ nTokens + 1; RETURN}; Digit: ARRAY CHAR ['0..'9] OF [0..9] ~ [0,1,2,3,4,5,6,7,8,9]; escapeMark: CHAR ~ '\\; Escape: PROC RETURNS[c: CHAR, valid, advance: BOOL_TRUE] ~ { c _ char; IF c = escapeMark THEN { NextChar[]; SELECT char FROM 'n, 'N => c _ Ascii.CR; 'r, 'R => c _ Ascii.CR; 'l, 'L => c _ Ascii.LF; 't, 'T => c _ Ascii.TAB; 'b, 'B => c _ Ascii.BS; 'f, 'F => c _ Ascii.FF; '', '", escapeMark => c _ char; IN ['0 .. '7] => { nc, v: CARDINAL _ 0; DO IF ~(char IN ['0..'7]) THEN {valid _ advance _ FALSE; EXIT}; v _ 8*v + Digit[char]; IF (nc _ nc+1) = 3 THEN EXIT; NextChar[]; ENDLOOP; IF v > 377b THEN {valid _ FALSE; v _ 0}; c _ VAL[v]}; ENDCASE => valid _ advance _ FALSE}; RETURN}; ScanInit: PUBLIC PROC[ source: IO.STREAM, logger: PROC [PROC [log: IO.STREAM]]] ~ { stream _ source; Logger _ logger; IF buffer = NIL THEN buffer _ NEW[TEXT[256]]; desc.base _ LOOPHOLE[buffer, LONG STRING]; desc.offset _ 0; iMax _ buffer.length _ buffer.maxLength; streamOrigin _ IO.GetIndex[stream]; tB _ NEW[TextBuffer]; tOrigin _ tLimit _ 0; tMax _ 0; tEnded _ FALSE; FillBuffer[]; char _ tB[tI]; qDot _ FALSE; nTokens _ nErrors _ 0}; ScanStats: PUBLIC PROC RETURNS[NAT, NAT] ~ { RETURN[nTokens, nErrors]}; ScanReset: PUBLIC PROC ~ { IF buffer # NIL THEN FREE[@buffer]; IF tB # NIL THEN FREE[@tB]; desc.base _ NIL; stream _ NIL; Logger _ NIL}; StreamIndex: TYPE ~ INT; -- FileStream.FileByteIndex charsPerPage: CARDINAL = Basics.charsPerWord*VM.wordsPerPage; ResetScanIndex: PUBLIC PROC[index: CARDINAL] RETURNS[success: BOOL] ~ { IF ~(index IN [tOrigin .. tLimit)) THEN { page: CARDINAL ~ index/charsPerPage; tOrigin _ tLimit _ page*charsPerPage; tMax _ 0; tEnded _ FALSE; IO.SetIndex[stream, streamOrigin + tOrigin]; FillBuffer[]}; tI _ index - tOrigin; IF tI >= tMax THEN FillBuffer[]; char _ tB[tI]; RETURN[TRUE]}; ErrorCode: TYPE ~ {number, string, char, atom, escape}; ScanError: PROC[code: ErrorCode, tokenIndex: CARDINAL] ~ { Inner: PROC [log: IO.STREAM] ~ { ErrorContext[log, SELECT code FROM $number => "invalid number", $string => "string unterminated or too long", $char => "invalid character", $atom => "invalid atom", $escape => "invalid escape sequence", ENDCASE => NIL, tokenIndex]; IO.PutChar[log, '\n]}; nErrors _ nErrors + 1; Logger[Inner]}; ErrorContext: PUBLIC PROC[ to: IO.STREAM, message: Rope.ROPE, tokenIndex: CARDINAL] ~ { saveIndex: StreamIndex ~ IO.GetIndex[stream]; origin: StreamIndex ~ streamOrigin + tokenIndex; start, lineIndex: StreamIndex _ origin; char: CHAR; n: [1..100]; FOR n IN [1..100] UNTIL lineIndex = 0 DO lineIndex _ lineIndex - 1; IO.SetIndex[stream, lineIndex]; IF stream.GetChar[] = Ascii.CR THEN EXIT; start _ lineIndex; ENDLOOP; IO.SetIndex[stream, start]; FOR n IN [1..100] UNTIL IO.EndOf[stream] DO char _ stream.GetChar[]; SELECT char FROM Ascii.CR, Ascii.ControlZ => EXIT; ENDCASE => IO.PutChar[to, char]; ENDLOOP; IO.PutChar[to, Ascii.CR]; IO.SetIndex[stream, start]; UNTIL IO.GetIndex[stream] = origin OR IO.EndOf[stream] DO char _ stream.GetChar[]; IO.PutChar[to, IF char = Ascii.TAB THEN '\t ELSE ' ]; ENDLOOP; IO.PutF[to, "^ %g[%d]\n", IO.rope[message], IO.card[tokenIndex]]; IO.SetIndex[stream, saveIndex]}; }. €ProtoScanner.mesa - derived from Compiler>Scanner.mesa Copyright c 1985 by Xerox Corporation. All rights reserved. Satterthwaite, February 4, 1986 2:23:17 pm PST Maxwell, August 11, 1983 2:22 pm Paul Rovner, September 22, 1983 9:49 pm Russ Atkinson (RRA) March 7, 1985 0:57:59 am PST table installation scanner state numerical conversion character and string constants initialization/finalization error handling ΚΏ˜codešœ6™6Kšœ Οmœ1™