-- file Scanner.Mesa -- last modified by Satterthwaite, January 8, 1980 10:33 AM DIRECTORY AltoDefs: FROM "altodefs" USING [CharsPerWord, maxword, PageSize], CharIO: FROM "chario" USING [CR, TAB, PutChar, PutNumber, PutString], LiteralOps: FROM "literalops" USING [FindDescriptor, Find, FindString], P1: FROM "p1" USING [Token], ParseTable: FROM "parsetable" USING [ Handle, HashIndex, TSymbol, VocabHashEntry, EndMarker, tokenARROW, tokenCHAR, tokenDOT, tokenDOTS, tokenEQUAL, tokenGE, tokenGREATER, tokenID, tokenLE, tokenLESS, tokenLNUM, tokenLSTR, tokenMINUS, tokenNUM, tokenSTR], StreamDefs: FROM "streamdefs" USING [ StreamHandle, StreamIndex, GetIndex, ModifyIndex, NormalizeIndex, ReadBlock, SetIndex, StreamError], StringDefs: FROM "stringdefs" USING [SubStringDescriptor, AppendString], SymbolOps: FROM "symbolops" USING [EnterString], SystemDefs: FROM "systemdefs" USING [ AllocateHeapString, AllocatePages, FreeHeapString, FreePages, PruneHeap]; Scanner: PROGRAM IMPORTS CharIO, LiteralOps, StreamDefs, StringDefs, SymbolOps, SystemDefs EXPORTS P1 = BEGIN OPEN ParseTable; hashTab: POINTER TO ARRAY HashIndex OF VocabHashEntry; scanTab: POINTER TO ARRAY CHARACTER [40C..177C] OF TSymbol; vocab: STRING; vocabIndex: POINTER TO ARRAY TSymbol OF CARDINAL; NUL: CHARACTER = 0C; CR: CHARACTER = CharIO.CR; ControlZ: CHARACTER = 32C; -- Bravo escape char stream: StreamDefs.StreamHandle; -- the input stream streamOrigin: StreamDefs.StreamIndex; errorStream: StreamDefs.StreamHandle; -- the error stream TextPages: CARDINAL = 6; TextWords: CARDINAL = TextPages*AltoDefs.PageSize; TextChars: CARDINAL = TextWords*AltoDefs.CharsPerWord; tB: POINTER TO PACKED ARRAY [0..TextChars) OF CHARACTER; tI, tMax: [0..TextChars]; tOrigin, tLimit: CARDINAL; tEnded: BOOLEAN; FillTextBuffer: PROCEDURE = BEGIN words: [0..TextWords]; bytes: [0..AltoDefs.CharsPerWord); tOrigin ← tLimit; IF tEnded THEN tMax ← 0 ELSE BEGIN words ← StreamDefs.ReadBlock[stream, tB, TextWords]; bytes ← StreamDefs.GetIndex[stream].byte MOD AltoDefs.CharsPerWord; IF bytes # 0 THEN words ← words-1; tMax ← words*AltoDefs.CharsPerWord + bytes; IF tMax < TextChars THEN tEnded ← TRUE; tLimit ← tOrigin + tMax; END; IF tMax = 0 THEN BEGIN tB[0] ← NUL; tMax ← 1 END; tI ← 0; END; buffer: STRING ← NIL; -- token assembly area iMax: CARDINAL; -- iMax = buffer.maxlength desc: StringDefs.SubStringDescriptor; -- initial buffer segment nTokens: CARDINAL; -- token count nErrors: CARDINAL; -- lexical errors BufferOverflow: ERROR = CODE; ExpandBuffer: PROCEDURE = BEGIN oldBuffer: STRING ← buffer; IF oldBuffer.length > 2000 THEN ERROR BufferOverflow; buffer ← SystemDefs.AllocateHeapString[2*oldBuffer.length]; StringDefs.AppendString[buffer, oldBuffer]; iMax ← buffer.length ← buffer.maxlength; SystemDefs.FreeHeapString[oldBuffer]; desc.base ← buffer; END; char: CHARACTER; -- current (most recently scanned) character NextChar: PROCEDURE = -- also expanded inline within Atom BEGIN IF (tI←tI+1) = tMax THEN FillTextBuffer[]; char ← tB[tI]; END; Atom: PUBLIC PROCEDURE RETURNS [token: P1.Token] = BEGIN OPEN token; DO WHILE char IN [NUL..' ] DO SELECT char FROM ControlZ => UNTIL char = CR DO IF (tI←tI+1) = tMax THEN BEGIN IF tEnded THEN GO TO EndFile; FillTextBuffer[] END; char ← tB[tI]; ENDLOOP; ENDCASE; IF (tI←tI+1) = tMax THEN BEGIN IF tEnded THEN GO TO EndFile; FillTextBuffer[] END; char ← tB[tI]; ENDLOOP; index ← tOrigin + tI; value ← 0; SELECT char FROM 'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm, 'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z => BEGIN i: CARDINAL; i ← 0; DO buffer[i] ← char; IF (tI←tI+1) = tMax THEN FillTextBuffer[]; char ← tB[tI]; SELECT char FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => IF (i ← i+1) >= iMax THEN ExpandBuffer[]; ENDCASE => EXIT; ENDLOOP; desc.length ← i+1; class ← tokenID; value ← SymbolOps.EnterString[@desc]; GO TO GotNext END; 'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M, 'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z => BEGIN first, last: CARDINAL; uId: BOOLEAN; i, j: CARDINAL; h: HashIndex; s1, s2: CARDINAL; i ← 0; uId ← TRUE; first ← last ← char-0C; DO buffer[i] ← char; IF (tI←tI+1) = tMax THEN FillTextBuffer[]; char ← tB[tI]; SELECT char FROM IN ['A..'Z] => BEGIN last ← char-0C; IF (i ← i+1) >= iMax THEN ExpandBuffer[]; END; IN ['a..'z], IN ['0..'9] => BEGIN uId ← FALSE; IF (i ← i+1) >= iMax THEN ExpandBuffer[]; END; ENDCASE => EXIT; ENDLOOP; i ← i+1; IF uId THEN BEGIN h ← ((first*128-first) + last) MOD LAST[HashIndex] + 1; WHILE (j ← hashTab[h].symbol) # 0 DO IF vocabIndex[j]-(s2←vocabIndex[j-1]) = i THEN FOR s1 IN [0 .. i) DO IF buffer[s1] # vocab[s2] THEN EXIT; s2 ← s2+1; REPEAT FINISHED => BEGIN class ← j; GO TO GotNext END; ENDLOOP; IF (h ← hashTab[h].link) = 0 THEN EXIT; ENDLOOP; END; desc.length ← i; class ← tokenID; value ← SymbolOps.EnterString[@desc]; GO TO GotNext END; '0, '1, '2, '3, '4, '5, '6, '7, '8, '9 => BEGIN v, v10, v8: LONG INTEGER; scale: CARDINAL; valid, valid10, valid8, octal: BOOLEAN; MaxLiteral: CARDINAL = AltoDefs.maxword; vRep: ARRAY [0..SIZE[LONG INTEGER]) OF WORD; -- machine dependent v10 ← v8 ← 0; valid10 ← valid8 ← TRUE; WHILE char IN ['0..'9] DO IF valid10 THEN [v10, valid10] ← AppendDigit10[v10, char]; IF valid8 THEN [v8, valid8] ← AppendDigit8[v8, char]; NextChar[]; ENDLOOP; SELECT char FROM 'B, 'C => BEGIN class ← IF char = 'C THEN tokenCHAR ELSE tokenNUM; v ← v8; valid ← valid8; octal ← TRUE; END; ENDCASE => BEGIN class ← tokenNUM; v ← v10; valid ← valid10; octal ← FALSE; END; SELECT char FROM 'B, 'C, 'D => BEGIN NextChar[]; IF class = tokenNUM THEN BEGIN scale ← 0; WHILE char IN ['0..'9] DO scale ← 10*scale + CARDINAL[char-'0]; NextChar[]; ENDLOOP; THROUGH [1 .. scale] WHILE valid DO IF octal THEN [v, valid] ← AppendDigit8[v, '0] ELSE [v, valid] ← AppendDigit10[v, '0]; ENDLOOP; END; END; ENDCASE; vRep ← LOOPHOLE[v]; IF vRep[1] = 0 --v <= MaxLiteral-- THEN value ← LiteralOps.Find[vRep[0]] ELSE BEGIN IF class = tokenCHAR THEN valid ← FALSE; class ← tokenLNUM; value ← LiteralOps.FindDescriptor[DESCRIPTOR[vRep]]; END; IF ~valid THEN BEGIN nErrors ← nErrors + 1; ScanError[number, index]; END; GO TO GotNext END; ',, ';, ':, '←, '#, '~, '+, '*, '/, '↑, '@, '!, '(, '), '[, '], '{, '} => BEGIN class ← scanTab[char]; GO TO GetNext END; '' => BEGIN NextChar[]; class ← tokenCHAR; value ← LiteralOps.Find[char-0C]; GO TO GetNext END; '" => BEGIN i: CARDINAL; i ← 0; DO IF (tI←tI+1) = tMax THEN BEGIN IF tEnded THEN GO TO EOFEnd; FillTextBuffer[] END; char ← tB[tI]; SELECT char FROM '" => BEGIN IF (tI←tI+1) = tMax THEN FillTextBuffer[]; char ← tB[tI]; IF char # '" THEN GO TO QuoteEnd; END; ENDCASE; IF i >= iMax THEN ExpandBuffer[ !BufferOverflow => BEGIN nErrors ← nErrors + 1; ScanError[string, tOrigin+tI]; i ← 0; CONTINUE END]; buffer[i] ← char; i ← i+1; REPEAT QuoteEnd => NULL; EOFEnd => BEGIN FillTextBuffer[]; char ← tB[tI] END; ENDLOOP; desc.length ← i; value ← LiteralOps.FindString[@desc]; IF char = 'L THEN BEGIN class ← tokenLSTR; GO TO GetNext END ELSE BEGIN class ← tokenSTR; GO TO GotNext END END; '- => BEGIN pChar: CHARACTER; NextChar[]; IF char # '- THEN BEGIN class ← tokenMINUS; GO TO GotNext END; char ← NUL; DO pChar ← char; IF (tI←tI+1) = tMax THEN BEGIN IF tEnded THEN GO TO EndFile; FillTextBuffer[] END; char ← tB[tI]; SELECT char FROM '- => IF pChar = '- THEN EXIT; CR => EXIT; ENDCASE; ENDLOOP; NextChar[]; END; '. => BEGIN NextChar[]; IF char = '. THEN BEGIN class ← tokenDOTS; GO TO GetNext END ELSE BEGIN class ← tokenDOT; GO TO GotNext END END; '= => BEGIN NextChar[]; IF char = '> THEN BEGIN class ← tokenARROW; GO TO GetNext END ELSE BEGIN class ← tokenEQUAL; GO TO GotNext END END; '< => BEGIN NextChar[]; IF char = '= THEN BEGIN class ← tokenLE; GO TO GetNext END ELSE BEGIN class ← tokenLESS; GO TO GotNext END END; '> => BEGIN NextChar[]; IF char = '= THEN BEGIN class ← tokenGE; GO TO GetNext END ELSE BEGIN class ← tokenGREATER; GO TO GotNext END END; ENDCASE => BEGIN class ← scanTab[char]; IF class # 0 THEN GO TO GetNext; NextChar[]; nErrors ← nErrors + 1; ScanError[char, index]; END; REPEAT GetNext => BEGIN IF (tI←tI+1) = tMax THEN FillTextBuffer[]; char ← tB[tI]; END; GotNext => NULL; EndFile => BEGIN FillTextBuffer[]; char ← tB[tI]; class ← EndMarker; index ← tOrigin; value ← 0; END; ENDLOOP; nTokens ← nTokens + 1; RETURN END; -- numerical conversion Digit: ARRAY CHARACTER ['0..'9] OF CARDINAL = [0,1,2,3,4,5,6,7,8,9]; AppendDigit10: PROCEDURE [v: LONG INTEGER, digit: CHARACTER ['0..'9]] RETURNS [newV: LONG INTEGER, valid: BOOLEAN] = BEGIN MaxV: LONG INTEGER = 429496729; -- (2**32-1)/10 MaxD: CARDINAL = 5; -- (2**32-1) MOD 10 d: [0..9] = Digit[digit]; valid ← v < MaxV OR (v = MaxV AND d <= MaxD); newV ← 10*v + d; RETURN END; AppendDigit8: PROCEDURE [v: LONG INTEGER, digit: CHARACTER ['0..'9]] RETURNS [newV: LONG INTEGER, valid: BOOLEAN] = BEGIN MaxV: LONG INTEGER = 3777777777B; -- (2**32-1)/8 MaxD: CARDINAL = 7B; -- (2**32-1) MOD 8 d: [0..9] = Digit[digit]; valid ← (d < 8) AND (v < MaxV OR (v = MaxV AND d <= MaxD)); newV ← 8*v + d; RETURN END; -- initialization/finalization ScanInit: PUBLIC PROCEDURE [ sourceStream, messageStream: StreamDefs.StreamHandle, table: ParseTable.Handle] = BEGIN hashTab ← @table.scanTable.hashTab; scanTab ← @table.scanTable.scanTab; vocab ← LOOPHOLE[@table.scanTable.vocabBody, STRING]; vocabIndex ← @table.scanTable.vocabIndex; IF buffer = NIL THEN buffer ← SystemDefs.AllocateHeapString[256]; iMax ← buffer.length ← buffer.maxlength; desc.base ← buffer; desc.offset ← 0; stream ← sourceStream; errorStream ← messageStream; streamOrigin ← StreamDefs.GetIndex[stream]; tB ← SystemDefs.AllocatePages[TextPages]; tOrigin ← tLimit ← 0; tMax ← 0; tEnded ← FALSE; FillTextBuffer[]; char ← tB[tI]; nTokens ← nErrors ← 0; END; ScanReset: PUBLIC PROCEDURE RETURNS [CARDINAL, CARDINAL] = BEGIN SystemDefs.FreePages[tB]; IF buffer # NIL THEN BEGIN SystemDefs.FreeHeapString[buffer]; buffer ← NIL END; [] ← SystemDefs.PruneHeap[]; RETURN [nTokens, nErrors] END; -- error handling StreamIndex: TYPE = StreamDefs.StreamIndex; NewLine: PROCEDURE = BEGIN CharIO.PutChar[errorStream, CR] END; PrintTextLine: PROCEDURE [origin: StreamIndex] RETURNS [start: StreamIndex] = BEGIN OPEN CharIO; lineIndex: StreamIndex; char: CHARACTER; n: [1..100]; start ← lineIndex ← origin; FOR n IN [1..100] UNTIL lineIndex = [0, 0] DO lineIndex ← StreamDefs.ModifyIndex[lineIndex, -1]; StreamDefs.SetIndex[stream, lineIndex]; IF stream.get[stream] = CR THEN EXIT; start ← lineIndex; ENDLOOP; StreamDefs.SetIndex[stream, start]; FOR n IN [1..100] UNTIL stream.endof[stream] DO char ← stream.get[stream]; SELECT char FROM CR, ControlZ => EXIT; ENDCASE => PutChar[errorStream, char]; ENDLOOP; NewLine[]; RETURN END; ResetScanIndex: PUBLIC PROCEDURE [index: CARDINAL] = BEGIN page: CARDINAL; IF index ~IN [tOrigin .. tLimit) THEN BEGIN page ← index/(AltoDefs.PageSize*AltoDefs.CharsPerWord); tOrigin ← tLimit ← page*(AltoDefs.PageSize*AltoDefs.CharsPerWord); tMax ← 0; tEnded ← FALSE; StreamDefs.SetIndex[stream, [page: streamOrigin.page+page, byte: streamOrigin.byte]]; FillTextBuffer[]; END; tI ← index - tOrigin; IF tI >= tMax THEN FillTextBuffer[]; char ← tB[tI]; END; ScanError: PROCEDURE [code: {number, string, char}, tokenIndex: CARDINAL] = BEGIN ErrorContext[ SELECT code FROM number => "Invalid Number"L, string => "String Too Long"L, char => "Invalid Character"L, ENDCASE => NIL, tokenIndex]; NewLine[]; END; ErrorContext: PUBLIC PROCEDURE [message: STRING, tokenIndex: CARDINAL] = BEGIN OPEN CharIO; saveIndex: StreamIndex = StreamDefs.GetIndex[stream]; origin: StreamIndex = StreamDefs.NormalizeIndex[ [page: streamOrigin.page, byte: streamOrigin.byte+tokenIndex]]; char: CHARACTER; StreamDefs.SetIndex[stream, PrintTextLine[origin]]; UNTIL StreamDefs.GetIndex[stream] = origin DO char ← stream.get[stream ! StreamDefs.StreamError => EXIT]; PutChar[errorStream, IF char = TAB THEN TAB ELSE ' ]; ENDLOOP; PutString[errorStream, "↑ "L]; PutString[errorStream, message]; PutString[errorStream, " ["L]; PutNumber[errorStream, tokenIndex, [base:10, zerofill:FALSE, unsigned:TRUE, columns:0]]; PutChar[errorStream, ']]; NewLine[]; StreamDefs.SetIndex[stream, saveIndex]; END; END.