-- File PackScanner.mesa -- Last modified by Satterthwaite on May 30, 1980 11:32 AM -- Last modified by Schmidt on September 18, 1980 1:59 PM -- Last modified by Lewis on 2-Apr-81 19:11:27 -- Last modified by Levin and Satterthwaite on July 6, 1982 4:32 pm -- Derived from Compiler>Scanner.Mesa DIRECTORY CharIO USING [CR, TAB, PutChar, PutNumber, PutString], Inline USING [LowHalf], PackEnviron USING [CharsPerWord, PageSize], P1: FROM "PackParseDefs" USING [Token], ParseTable USING [ HashIndex, TSymbol, VocabHashEntry, TableRef, tokenID, TrueEndOfFile], Streams USING [Ended, Handle, GetIndex, GetBlock, SetIndex, GetByte, End], LongStorage USING [FreeString, FreePages, Pages, Prune, String], Strings USING [AppendString, SubStringDescriptor, String], SymTabOps USING [EnterString]; Scanner: PROGRAM IMPORTS CharIO, Inline, Streams, LongStorage, Strings, SymTabOps EXPORTS P1 = BEGIN OPEN ParseTable; hashTab: LONG POINTER TO ARRAY HashIndex OF VocabHashEntry; scanTab: LONG POINTER TO ARRAY CHARACTER [40C..177C] OF TSymbol; vocab: Strings.String; vocabIndex: LONG POINTER TO ARRAY TSymbol OF CARDINAL; NUL: CHARACTER = 0C; CR: CHARACTER = CharIO.CR; ControlZ: CHARACTER = 32C; -- Bravo escape char stream: Streams.Handle; -- the input stream streamOrigin: LONG CARDINAL; errorStream: Streams.Handle; -- the error stream TextPages: CARDINAL = 6; TextWords: CARDINAL = TextPages * PackEnviron.PageSize; TextChars: CARDINAL = TextWords * PackEnviron.CharsPerWord; tB: LONG POINTER TO PACKED ARRAY [0..TextChars) OF CHARACTER; tI, tMax: [0..TextChars]; tOrigin, tLimit: CARDINAL; tEnded: BOOLEAN; FillBuffer: PROC = { words: [0..TextWords]; bytes: [0..PackEnviron.CharsPerWord); tOrigin ← tLimit; IF tEnded THEN tMax ← 0 ELSE { words ← Streams.GetBlock[stream, tB, TextWords]; bytes ← Inline.LowHalf[Streams.GetIndex[stream]] MOD PackEnviron.CharsPerWord; IF bytes # 0 THEN words ← words-1; tMax ← words*PackEnviron.CharsPerWord + bytes; IF tMax < TextChars THEN tEnded ← TRUE; tLimit ← tOrigin + tMax}; IF tMax = 0 THEN {tB[0] ← NUL; tMax ← 1}; tI ← 0}; buffer: Strings.String ← NIL; -- token assembly area iMax: CARDINAL; -- iMax = buffer.maxlength desc: Strings.SubStringDescriptor; -- initial buffer segment nTokens: CARDINAL; -- token count nErrors: CARDINAL; -- lexical errors BufferOverflow: ERROR = CODE; ExpandBuffer: PROC = { oldBuffer: Strings.String ← buffer; IF oldBuffer.length > 2000 THEN ERROR BufferOverflow; buffer ← LongStorage.String[2*oldBuffer.length]; Strings.AppendString[buffer, oldBuffer]; iMax ← buffer.length ← buffer.maxlength; LongStorage.FreeString[oldBuffer]; desc.base ← buffer}; char: CHARACTER; -- current (most recently scanned) character NextChar: PROC = { -- also expanded inline within Atom IF (tI←tI+1) = tMax THEN FillBuffer[]; char ← tB[tI]}; Atom: PUBLIC PROC RETURNS [token: P1.Token] = { OPEN token; DO WHILE char IN [NUL..' ] DO SELECT char FROM ControlZ => UNTIL char = CR DO IF (tI←tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char ← tB[tI]; ENDLOOP; ENDCASE; IF (tI←tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char ← tB[tI]; ENDLOOP; index ← tOrigin + tI; value ← 0; SELECT char FROM 'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm, 'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z => { i: CARDINAL; i ← 0; DO buffer[i] ← char; IF (tI←tI+1) = tMax THEN FillBuffer[]; char ← tB[tI]; SELECT char FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => IF (i ← i+1) >= iMax THEN ExpandBuffer[]; ENDCASE => EXIT; ENDLOOP; desc.length ← i+1; class ← tokenID; value ← SymTabOps.EnterString[@desc]; GO TO GotNext}; 'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M, 'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z => { first, last: CARDINAL; uId: BOOLEAN; i, j: CARDINAL; h: HashIndex; s1, s2: CARDINAL; i ← 0; uId ← TRUE; first ← last ← char-0C; DO buffer[i] ← char; IF (tI←tI+1) = tMax THEN FillBuffer[]; char ← tB[tI]; SELECT char FROM IN ['A..'Z] => { last ← char-0C; IF (i ← i+1) >= iMax THEN ExpandBuffer[]}; IN ['a..'z], IN ['0..'9] => { uId ← FALSE; IF (i ← i+1) >= iMax THEN ExpandBuffer[]}; ENDCASE => EXIT; ENDLOOP; i ← i+1; IF uId THEN { h ← ((first*128-first) + last) MOD LAST[HashIndex] + 1; WHILE (j ← hashTab[h].symbol) # 0 DO IF vocabIndex[j]-(s2←vocabIndex[j-1]) = i THEN FOR s1 IN [0 .. i) DO IF buffer[s1] # vocab[s2] THEN EXIT; s2 ← s2+1; REPEAT FINISHED => {class ← j; GO TO GotNext}; ENDLOOP; IF (h ← hashTab[h].link) = 0 THEN EXIT; ENDLOOP}; desc.length ← i; class ← tokenID; value ← SymTabOps.EnterString[@desc]; GO TO GotNext}; ',, ';, ':, '=, '., '[, '], '{, '} => { class ← scanTab[char]; GO TO GetNext}; '- => { pChar: CHARACTER; NextChar[]; IF char # '- THEN {class ← scanTab['-]; GO TO GotNext}; char ← NUL; DO pChar ← char; IF (tI←tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char ← tB[tI]; SELECT char FROM '- => IF pChar = '- THEN EXIT; CR => EXIT; ENDCASE; ENDLOOP; NextChar[]}; ENDCASE => { class ← scanTab[char]; IF class # 0 THEN GO TO GetNext; NextChar[]; ScanError[char, index]}; REPEAT GetNext => {IF (tI←tI+1) = tMax THEN FillBuffer[]; char ← tB[tI]}; GotNext => NULL; EndFile => { FillBuffer[]; char ← tB[tI]; class ← TrueEndOfFile; index ← tOrigin; value ← 0}; ENDLOOP; nTokens ← nTokens + 1; RETURN}; -- initialization/finalization ScanInit: PUBLIC PROC [ sourceStream, messageStream: Streams.Handle, table: ParseTable.TableRef] = { hashTab ← @table[table.scanTable.hashTab]; scanTab ← @table[table.scanTable.scanTab]; vocab ← LOOPHOLE[@table[table.scanTable.vocabBody], Strings.String]; vocabIndex ← @table[table.scanTable.vocabIndex]; IF buffer = NIL THEN buffer ← LongStorage.String[256]; iMax ← buffer.length ← buffer.maxlength; desc.base ← buffer; desc.offset ← 0; stream ← sourceStream; errorStream ← messageStream; streamOrigin ← Streams.GetIndex[stream]; tB ← LongStorage.Pages[TextPages]; tOrigin ← tLimit ← 0; tMax ← 0; tEnded ← FALSE; FillBuffer[]; char ← tB[tI]; nTokens ← nErrors ← 0}; ScanReset: PUBLIC PROC RETURNS [CARDINAL, CARDINAL] = { LongStorage.FreePages[tB]; IF buffer # NIL THEN {LongStorage.FreeString[buffer]; buffer ← NIL}; [] ← LongStorage.Prune[]; RETURN [nTokens, nErrors]}; -- error handling StreamIndex: TYPE = LONG CARDINAL; NewLine: PROC = INLINE {CharIO.PutChar[errorStream, CR]}; PrintTextLine: PROC [origin: LONG CARDINAL] RETURNS [start: LONG CARDINAL] = { OPEN CharIO; lineIndex: LONG CARDINAL; char: CHARACTER; n: [1..100]; start ← lineIndex ← origin; FOR n IN [1..100] UNTIL lineIndex = 0 DO lineIndex ← lineIndex - 1; Streams.SetIndex[stream, lineIndex]; IF Streams.GetByte[stream] = CR THEN EXIT; start ← lineIndex; ENDLOOP; Streams.SetIndex[stream, start]; FOR n IN [1..100] UNTIL Streams.Ended[stream] DO char ← Streams.GetByte[stream ! Streams.End[] => GOTO out]; SELECT char FROM CR, ControlZ => EXIT; ENDCASE => PutChar[errorStream, char]; REPEAT out => NULL; ENDLOOP; NewLine[]; RETURN}; ResetScanIndex: PUBLIC PROC [index: CARDINAL] = { page: CARDINAL; IF ~(index IN [tOrigin .. tLimit)) THEN { page ← index/(PackEnviron.PageSize * PackEnviron.CharsPerWord); tOrigin ← tLimit ← page*(PackEnviron.PageSize * PackEnviron.CharsPerWord); tMax ← 0; tEnded ← FALSE; Streams.SetIndex[stream, streamOrigin + index]; FillBuffer[]}; tI ← index - tOrigin; IF tI >= tMax THEN FillBuffer[]; char ← tB[tI]}; ScanError: PROC [code: {number, string, char, atom}, tokenIndex: CARDINAL] = { nErrors ← nErrors + 1; ErrorContext[ SELECT code FROM number => "invalid number"L, string => "string too long"L, char => "invalid character"L, atom => "invalid atom"L, ENDCASE => NIL, tokenIndex]; NewLine[]}; ErrorContext: PUBLIC PROC [message: STRING, tokenIndex: CARDINAL] = { OPEN CharIO; saveIndex: LONG CARDINAL = Streams.GetIndex[stream]; origin: LONG CARDINAL = streamOrigin+tokenIndex; char: CHARACTER; Streams.SetIndex[stream, PrintTextLine[origin]]; UNTIL Streams.GetIndex[stream] = origin DO char ← Streams.GetByte[stream ! Streams.End[] => GOTO out]; PutChar[errorStream, IF char = TAB THEN TAB ELSE ' ]; REPEAT out => NULL; ENDLOOP; PutString[errorStream, "↑ "L]; PutString[errorStream, message]; PutString[errorStream, " ["L]; PutNumber[errorStream, tokenIndex, [base:10, zerofill:FALSE, unsigned:TRUE, columns:0]]; PutChar[errorStream, ']]; NewLine[]; Streams.SetIndex[stream, saveIndex]}; END.