-- file ProtoScanner.mesa -- last modified by Satterthwaite, January 10, 1983 2:20 pm -- derived from Compiler>Scanner.mesa DIRECTORY Ascii: TYPE USING [BS, ControlZ, CR, FF, LF, NUL, TAB], CharIO: TYPE USING [PutChar, PutNumber, PutString], Environment: TYPE USING [charsPerWord, charsPerPage, wordsPerPage], FileStream: TYPE USING [FileByteIndex, EndOf, GetIndex, SetIndex], HashOps: TYPE USING [EnterString], P1: TYPE USING [Token, Value, nullValue], ParseTable: TYPE USING [ HashIndex, HashTableRef, IndexTableRef, ScanTableRef, TableRef, VocabularyRef, endMarker, tokenID, tokenSTR], Stream: TYPE USING [Handle, GetBlock, GetChar], Strings: TYPE USING [String, SubStringDescriptor, AppendString]; Scanner: PROGRAM IMPORTS CharIO, FileStream, HashOps, Stream, Strings EXPORTS P1 = { OPEN ParseTable; -- table installation tablePtr: ParseTable.TableRef; hashTab: HashTableRef; scanTab: ScanTableRef; vocab: VocabularyRef; vocabIndex: IndexTableRef; InstallScanTable: PUBLIC PROC [base: ParseTable.TableRef] = { tablePtr ← base; hashTab ← @tablePtr[tablePtr.scanTable.hashTab]; scanTab ← @tablePtr[tablePtr.scanTable.scanTab]; vocab ← LOOPHOLE[@tablePtr[tablePtr.scanTable.vocabBody]]; vocabIndex ← @tablePtr[tablePtr.scanTable.vocabIndex]}; -- scanner state stream: Stream.Handle ← NIL; -- the input stream streamOrigin: FileStream.FileByteIndex; Logger: PROC [PROC [log: Stream.Handle]] ← NIL; zone: UNCOUNTED ZONE ← NIL; textPages: NAT ~ 6; textWords: NAT ~ textPages*Environment.wordsPerPage; textChars: NAT ~ textWords*Environment.charsPerWord; TextBuffer: TYPE ~ PACKED ARRAY [0..textChars) OF CHAR; tB: LONG POINTER TO TextBuffer; tI, tMax: [0..textChars]; tOrigin, tLimit: CARDINAL; tEnded: BOOL; FillBuffer: PROC ~ { tOrigin ← tLimit; IF tEnded THEN tMax ← 0 ELSE { tMax ← stream.GetBlock[[LOOPHOLE[tB], 0, textChars]].bytesTransferred; IF tMax < textChars THEN tEnded ← TRUE; tLimit ← tOrigin + tMax}; IF tMax = 0 THEN {tB[0] ← Ascii.NUL; tMax ← 1}; tI ← 0}; buffer: Strings.String ← NIL; -- token assembly area iMax: CARDINAL; -- iMax = buffer.maxlength desc: Strings.SubStringDescriptor; -- initial buffer segment nTokens: NAT; -- token count nErrors: NAT; -- lexical errors BufferOverflow: ERROR ~ CODE; ExpandBuffer: PROC ~ { oldBuffer: Strings.String ← buffer; IF oldBuffer.length > 2000 THEN ERROR BufferOverflow; buffer ← zone.NEW[StringBody[2*oldBuffer.length]]; Strings.AppendString[buffer, oldBuffer]; iMax ← buffer.length ← buffer.maxlength; zone.FREE[@oldBuffer]; desc.base ← buffer}; char: CHAR; -- current (most recently scanned) character qDot: BOOL; -- used to resolved decimal point vs. interval NextChar: PROC ~ { -- also expanded inline within Atom IF (tI←tI+1) = tMax THEN FillBuffer[]; char ← tB[tI]}; Atom: PUBLIC PROC RETURNS [token: P1.Token] ~ { OPEN token; DO WHILE char IN [Ascii.NUL..' ] DO SELECT char FROM Ascii.NUL => { -- ↑@↑@ is Tioga escape seq IF (tI←tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char ← tB[tI]; IF char = Ascii.NUL THEN GO TO EndFile}; Ascii.ControlZ => -- ↑Z is Bravo escape char UNTIL char = Ascii.CR DO IF (tI←tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char ← tB[tI]; ENDLOOP; ENDCASE => { IF (tI←tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char ← tB[tI]}; ENDLOOP; index ← tOrigin + tI; value ← P1.nullValue; SELECT char FROM 'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm, 'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z => { i: CARDINAL ← 0; DO buffer[i] ← char; IF (tI←tI+1) = tMax THEN FillBuffer[]; char ← tB[tI]; SELECT char FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => IF (i ← i+1) >= iMax THEN ExpandBuffer[]; ENDCASE => EXIT; ENDLOOP; desc.length ← i+1; class ← tokenID; value.r ← HashOps.EnterString[@desc]; GO TO GotNext}; 'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M, 'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z => { i: CARDINAL ← 0; uId: BOOL ← TRUE; first, last: NAT ← char.ORD; DO buffer[i] ← char; IF (tI←tI+1) = tMax THEN FillBuffer[]; char ← tB[tI]; SELECT char FROM IN ['A..'Z] => { last ← char.ORD; IF (i ← i+1) >= iMax THEN ExpandBuffer[]}; IN ['a..'z], IN ['0..'9] => { uId ← FALSE; IF (i ← i+1) >= iMax THEN ExpandBuffer[]}; ENDCASE => EXIT; ENDLOOP; i ← i+1; IF uId THEN { h: HashIndex ← ((first*128-first) + last) MOD HashIndex.LAST + 1; j, s1, s2: CARDINAL; WHILE (j ← hashTab[h].symbol) # 0 DO IF vocabIndex[j]-(s2←vocabIndex[j-1]) = i THEN FOR s1 IN [0 .. i) DO IF buffer[s1] # vocab.text[s2] THEN EXIT; s2 ← s2+1; REPEAT FINISHED => {class ← j; GO TO GotNext}; ENDLOOP; IF (h ← hashTab[h].link) = 0 THEN EXIT; ENDLOOP}; desc.length ← i; class ← tokenID; value.r ← HashOps.EnterString[@desc]; GO TO GotNext}; ',, ';, ':, '←, '#, '~, '+, '*, '/, '↑, '@, '!, '=, '., '(, '), '[, '], '{, '} => { class ← scanTab[char]; GO TO GetNext}; '" => { i: CARDINAL ← 0; valid: BOOL; advance: BOOL ← TRUE; DO IF advance THEN { IF (tI←tI+1) = tMax THEN {IF tEnded THEN GO TO EOFEnd; FillBuffer[]}; char ← tB[tI]}; SELECT char FROM '" => { IF (tI←tI+1) = tMax THEN FillBuffer[]; char ← tB[tI]; IF char # '" THEN GO TO QuoteEnd}; ENDCASE; IF i >= iMax THEN ExpandBuffer[ ! BufferOverflow => {ScanError[string, index]; i ← 0; CONTINUE}]; [buffer[i], valid, advance] ← Escape[]; i ← i+1; IF ~valid THEN ScanError[$escape, tOrigin + tI]; REPEAT QuoteEnd => NULL; EOFEnd => {ScanError[$string, index]; FillBuffer[]; char ← tB[tI]}; ENDLOOP; desc.length ← i; value.r ← HashOps.EnterString[@desc]; class ← tokenSTR; GO TO GotNext}; '- => { NextChar[]; IF char # '- THEN { class ← scanTab['-]; IF class = 0 THEN ScanError[char, index-1]; GO TO GotNext}; char ← Ascii.NUL; DO pChar: CHAR ~ char; IF (tI←tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char ← tB[tI]; SELECT char FROM '- => IF pChar = '- THEN EXIT; Ascii.CR => EXIT; ENDCASE; ENDLOOP; NextChar[]}; '< => { NextChar[]; SELECT char FROM '< => { state: {plain, leftBrocket, rightBrocket} ← $plain; nest: CARDINAL ← 1; DO IF (tI←tI+1) = tMax THEN { IF tEnded THEN GO TO EndFile; FillBuffer[]}; char ← tB[tI]; SELECT char FROM '> => SELECT state FROM $plain, $leftBrocket => state ← $rightBrocket; $rightBrocket => { state ← $plain; nest ← nest - 1; IF nest = 0 THEN EXIT}; ENDCASE; '< => SELECT state FROM $plain, $rightBrocket => state ← $leftBrocket; $leftBrocket => {state ← $plain; nest ← nest + 1}; ENDCASE; ENDCASE => state ← $plain; ENDLOOP; NextChar[]}; ENDCASE => ScanError[$char, index]}; ENDCASE => { class ← scanTab[char]; IF class # 0 THEN GO TO GetNext; NextChar[]; ScanError[$char, index]}; REPEAT GetNext => {IF (tI←tI+1) = tMax THEN FillBuffer[]; char ← tB[tI]}; GotNext => NULL; EndFile => { class ← endMarker; index ← tOrigin + (tI-1); value ← P1.nullValue; UNTIL tEnded DO FillBuffer[] ENDLOOP; -- flush stream FillBuffer[]; char ← tB[tI]}; ENDLOOP; nTokens ← nTokens + 1; RETURN}; -- numerical conversion Digit: ARRAY CHAR ['0..'9] OF [0..9] ~ [0,1,2,3,4,5,6,7,8,9]; -- character and string constants escapeMark: CHAR ~ '\\; Escape: PROC RETURNS [c: CHAR, valid, advance: BOOL←TRUE] ~ { c ← char; IF c = escapeMark THEN { NextChar[]; SELECT char FROM 'n, 'N => c ← Ascii.CR; 'r, 'R => c ← Ascii.CR; 'l, 'L => c ← Ascii.LF; 't, 'T => c ← Ascii.TAB; 'b, 'B => c ← Ascii.BS; 'f, 'F => c ← Ascii.FF; '', '", escapeMark => c ← char; IN ['0 .. '7] => { nc, v: CARDINAL ← 0; DO IF ~(char IN ['0..'7]) THEN {valid ← advance ← FALSE; EXIT}; v ← 8*v + Digit[char]; IF (nc ← nc+1) = 3 THEN EXIT; NextChar[]; ENDLOOP; IF v > 377b THEN {valid ← FALSE; v ← 0}; c ← VAL[v]}; ENDCASE => valid ← advance ← FALSE}; RETURN}; -- initialization/finalization ScanInit: PUBLIC PROC [ source: Stream.Handle, scratchZone: UNCOUNTED ZONE, logger: PROC [PROC [log: Stream.Handle]]] ~ { zone ← scratchZone; stream ← source; Logger ← logger; IF buffer = NIL THEN buffer ← zone.NEW[StringBody[256]]; iMax ← buffer.length ← buffer.maxlength; desc.base ← buffer; desc.offset ← 0; streamOrigin ← FileStream.GetIndex[stream]; tB ← zone.NEW[TextBuffer]; tOrigin ← tLimit ← 0; tMax ← 0; tEnded ← FALSE; FillBuffer[]; char ← tB[tI]; qDot ← FALSE; nTokens ← nErrors ← 0}; ScanReset: PUBLIC PROC RETURNS [NAT, NAT] ~ { stream ← NIL; Logger ← NIL; zone.FREE[@tB]; IF buffer # NIL THEN zone.FREE[@buffer]; zone ← NIL; RETURN [nTokens, nErrors]}; -- error handling StreamIndex: TYPE ~ FileStream.FileByteIndex; ResetScanIndex: PUBLIC PROC [index: CARDINAL] RETURNS [success: BOOL] ~ { IF ~(index IN [tOrigin .. tLimit)) THEN { page: CARDINAL ~ index/Environment.charsPerPage; tOrigin ← tLimit ← page*Environment.charsPerPage; tMax ← 0; tEnded ← FALSE; FileStream.SetIndex[stream, streamOrigin + tOrigin]; FillBuffer[]}; tI ← index - tOrigin; IF tI >= tMax THEN FillBuffer[]; char ← tB[tI]; RETURN [TRUE]}; ErrorCode: TYPE ~ {number, string, char, atom, escape}; ScanError: PROC [code: ErrorCode, tokenIndex: CARDINAL] ~ { Inner: PROC [log: Stream.Handle] ~ { ErrorContext[log, SELECT code FROM $number => "invalid number"L, $string => "string unterminated or too long"L, $char => "invalid character"L, $atom => "invalid atom"L, $escape => "invalid escape sequence"L, ENDCASE => NIL, tokenIndex]; CharIO.PutChar[log, '\n]}; nErrors ← nErrors + 1; Logger[Inner]}; ErrorContext: PUBLIC PROC [ to: Stream.Handle, message: Strings.String, tokenIndex: CARDINAL] ~ { OPEN CharIO; saveIndex: StreamIndex ~ FileStream.GetIndex[stream]; origin: StreamIndex ~ streamOrigin + tokenIndex; start, lineIndex: StreamIndex ← origin; char: CHAR; n: [1..100]; FOR n IN [1..100] UNTIL lineIndex = 0 DO lineIndex ← lineIndex - 1; FileStream.SetIndex[stream, lineIndex]; IF stream.GetChar[] = Ascii.CR THEN EXIT; start ← lineIndex; ENDLOOP; FileStream.SetIndex[stream, start]; FOR n IN [1..100] UNTIL FileStream.EndOf[stream] DO char ← stream.GetChar[]; SELECT char FROM Ascii.CR, Ascii.ControlZ => EXIT; ENDCASE => PutChar[to, char]; ENDLOOP; CharIO.PutChar[to, Ascii.CR]; FileStream.SetIndex[stream, start]; UNTIL FileStream.GetIndex[stream] = origin OR FileStream.EndOf[stream] DO char ← stream.GetChar[]; PutChar[to, IF char = Ascii.TAB THEN '\t ELSE ' ]; ENDLOOP; PutString[to, "↑ "L]; PutString[to, message]; PutString[to, " ["L]; PutNumber[to, tokenIndex, [base~10, zerofill~FALSE, unsigned~TRUE, columns~0]]; PutChar[to, ']]; CharIO.PutChar[to, '\n]; FileStream.SetIndex[stream, saveIndex]}; }.