-- file BcdScan.mesa -- last modified by Satterthwaite, October 28, 1982 11:36 am -- derived from Compiler>Scanner.mesa DIRECTORY Ascii: TYPE USING [BS, ControlZ, CR, FF, LF, NUL, TAB], CharIO: TYPE USING [PutChar, PutNumber, PutString], CompilerUtil: TYPE USING [ AcquireStream, AcquireZone, ReleaseStream, ReleaseZone], Environment: TYPE USING [charsPerWord, charsPerPage, wordsPerPage], FileStream: TYPE USING [FileByteIndex, EndOf, GetIndex, SetIndex], P1: TYPE USING [Token, Value, NullValue], ParseTable: TYPE USING [ HashIndex, HashTableRef, IndexTableRef, ScanTableRef, TableRef, VocabularyRef, EndMarker, tokenID, tokenSTR], Stream: TYPE USING [Handle, GetBlock, GetChar], Strings: TYPE USING [String, SubStringDescriptor, AppendString], SymbolOps: TYPE USING [EnterString]; Scanner: PROGRAM IMPORTS CharIO, CompilerUtil, FileStream, Stream, Strings, SymbolOps EXPORTS P1 = { OPEN ParseTable; zone: UNCOUNTED ZONE _ NIL; hashTab: HashTableRef; scanTab: ScanTableRef; vocab: VocabularyRef; vocabIndex: IndexTableRef; stream: Stream.Handle _ NIL; -- the input stream streamOrigin: FileStream.FileByteIndex; textPages: NAT ~ 6; textWords: NAT ~ textPages*Environment.wordsPerPage; textChars: NAT ~ textWords*Environment.charsPerWord; TextBuffer: TYPE ~ PACKED ARRAY [0..textChars) OF CHAR; tB: LONG POINTER TO TextBuffer; tI, tMax: [0..textChars]; tOrigin, tLimit: CARDINAL; tEnded: BOOL; FillBuffer: PROC ~ { tOrigin _ tLimit; IF tEnded THEN tMax _ 0 ELSE { tMax _ stream.GetBlock[[LOOPHOLE[tB], 0, textChars]].bytesTransferred; IF tMax < textChars THEN tEnded _ TRUE; tLimit _ tOrigin + tMax}; IF tMax = 0 THEN {tB[0] _ Ascii.NUL; tMax _ 1}; tI _ 0}; buffer: Strings.String _ NIL; -- token assembly area iMax: CARDINAL; -- iMax = buffer.maxlength desc: Strings.SubStringDescriptor; -- initial buffer segment nTokens: NAT; -- token count nErrors: NAT; -- lexical errors BufferOverflow: ERROR ~ CODE; ExpandBuffer: PROC ~ { oldBuffer: Strings.String _ buffer; IF oldBuffer.length > 2000 THEN ERROR BufferOverflow; buffer _ zone.NEW[StringBody[2*oldBuffer.length]]; Strings.AppendString[buffer, oldBuffer]; iMax _ buffer.length _ buffer.maxlength; zone.FREE[@oldBuffer]; desc.base _ buffer}; char: CHAR; -- current (most recently scanned) character qDot: BOOL; -- used to resolved decimal point vs. interval NextChar: PROC ~ { -- also expanded inline within Atom IF (tI_tI+1) = tMax THEN FillBuffer[]; char _ tB[tI]}; Atom: PUBLIC PROC RETURNS [token: P1.Token] ~ { OPEN token; DO WHILE char IN [Ascii.NUL..' ] DO SELECT char FROM Ascii.NUL => { -- ^@^@ is Tioga escape seq IF (tI_tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char _ tB[tI]; IF char = Ascii.NUL THEN GO TO EndFile}; Ascii.ControlZ => -- ^Z is Bravo escape char UNTIL char = Ascii.CR DO IF (tI_tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char _ tB[tI]; ENDLOOP; ENDCASE => { IF (tI_tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char _ tB[tI]}; ENDLOOP; index _ tOrigin + tI; value _ P1.NullValue; SELECT char FROM 'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm, 'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z => { i: CARDINAL _ 0; DO buffer[i] _ char; IF (tI_tI+1) = tMax THEN FillBuffer[]; char _ tB[tI]; SELECT char FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => IF (i _ i+1) >= iMax THEN ExpandBuffer[]; ENDCASE => EXIT; ENDLOOP; desc.length _ i+1; class _ tokenID; value.r _ SymbolOps.EnterString[@desc]; GO TO GotNext}; 'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M, 'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z => { i: CARDINAL _ 0; uId: BOOL _ TRUE; first, last: NAT _ char.ORD; DO buffer[i] _ char; IF (tI_tI+1) = tMax THEN FillBuffer[]; char _ tB[tI]; SELECT char FROM IN ['A..'Z] => { last _ char.ORD; IF (i _ i+1) >= iMax THEN ExpandBuffer[]}; IN ['a..'z], IN ['0..'9] => { uId _ FALSE; IF (i _ i+1) >= iMax THEN ExpandBuffer[]}; ENDCASE => EXIT; ENDLOOP; i _ i+1; IF uId THEN { h: HashIndex _ ((first*128-first) + last) MOD HashIndex.LAST + 1; j, s1, s2: CARDINAL; WHILE (j _ hashTab[h].symbol) # 0 DO IF vocabIndex[j]-(s2_vocabIndex[j-1]) = i THEN FOR s1 IN [0 .. i) DO IF buffer[s1] # vocab.text[s2] THEN EXIT; s2 _ s2+1; REPEAT FINISHED => {class _ j; GO TO GotNext}; ENDLOOP; IF (h _ hashTab[h].link) = 0 THEN EXIT; ENDLOOP}; desc.length _ i; class _ tokenID; value.r _ SymbolOps.EnterString[@desc]; GO TO GotNext}; ',, ';, ':, '_, '#, '~, '+, '*, '/, '^, '@, '!, '(, '), '[, '], '{, '} => { class _ scanTab[char]; GO TO GetNext}; '" => { i: CARDINAL _ 0; valid: BOOL; advance: BOOL _ TRUE; DO IF advance THEN { IF (tI_tI+1) = tMax THEN {IF tEnded THEN GO TO EOFEnd; FillBuffer[]}; char _ tB[tI]}; SELECT char FROM '" => { IF (tI_tI+1) = tMax THEN FillBuffer[]; char _ tB[tI]; IF char # '" THEN GO TO QuoteEnd}; ENDCASE; IF i >= iMax THEN ExpandBuffer[ ! BufferOverflow => {ScanError[string, index]; i _ 0; CONTINUE}]; [buffer[i], valid, advance] _ Escape[]; i _ i+1; IF ~valid THEN ScanError[$escape, tOrigin + tI]; REPEAT QuoteEnd => NULL; EOFEnd => {ScanError[$string, index]; FillBuffer[]; char _ tB[tI]}; ENDLOOP; desc.length _ i; value.r _ SymbolOps.EnterString[@desc]; class _ tokenSTR; GO TO GotNext}; '- => { NextChar[]; IF char # '- THEN { class _ scanTab['-]; IF class = 0 THEN ScanError[char, index-1]; GO TO GotNext}; char _ Ascii.NUL; DO pChar: CHAR ~ char; IF (tI_tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char _ tB[tI]; SELECT char FROM '- => IF pChar = '- THEN EXIT; Ascii.CR => EXIT; ENDCASE; ENDLOOP; NextChar[]}; ENDCASE => { class _ scanTab[char]; IF class # 0 THEN GO TO GetNext; NextChar[]; ScanError[$char, index]}; REPEAT GetNext => {IF (tI_tI+1) = tMax THEN FillBuffer[]; char _ tB[tI]}; GotNext => NULL; EndFile => { class _ EndMarker; index _ tOrigin + (tI-1); value _ P1.NullValue; UNTIL tEnded DO FillBuffer[] ENDLOOP; -- flush stream FillBuffer[]; char _ tB[tI]}; ENDLOOP; nTokens _ nTokens + 1; RETURN}; -- numerical conversion Digit: ARRAY CHAR ['0..'9] OF [0..9] ~ [0,1,2,3,4,5,6,7,8,9]; -- character and string constants escapeMark: CHAR ~ '\\; Escape: PROC RETURNS [c: CHAR, valid, advance: BOOL_TRUE] ~ { c _ char; IF c = escapeMark THEN { NextChar[]; SELECT char FROM 'n, 'N => c _ Ascii.CR; 'r, 'R => c _ Ascii.CR; 'l, 'L => c _ Ascii.LF; 't, 'T => c _ Ascii.TAB; 'b, 'B => c _ Ascii.BS; 'f, 'F => c _ Ascii.FF; '', '", escapeMark => c _ char; IN ['0 .. '7] => { nc, v: CARDINAL _ 0; DO IF ~(char IN ['0..'7]) THEN {valid _ advance _ FALSE; EXIT}; v _ 8*v + Digit[char]; IF (nc _ nc+1) = 3 THEN EXIT; NextChar[]; ENDLOOP; IF v > 377b THEN {valid _ FALSE; v _ 0}; c _ v + 0c}; ENDCASE => valid _ advance _ FALSE}; RETURN}; -- initialization/finalization ScanInit: PUBLIC PROC [table: ParseTable.TableRef] ~ { zone _ CompilerUtil.AcquireZone[]; stream _ CompilerUtil.AcquireStream[source]; hashTab _ @table[table.scanTable.hashTab]; scanTab _ @table[table.scanTable.scanTab]; vocab _ LOOPHOLE[@table[table.scanTable.vocabBody]]; vocabIndex _ @table[table.scanTable.vocabIndex]; IF buffer = NIL THEN buffer _ zone.NEW[StringBody[256]]; iMax _ buffer.length _ buffer.maxlength; desc.base _ buffer; desc.offset _ 0; streamOrigin _ FileStream.GetIndex[stream]; tB _ zone.NEW[TextBuffer]; tOrigin _ tLimit _ 0; tMax _ 0; tEnded _ FALSE; FillBuffer[]; char _ tB[tI]; qDot _ FALSE; nTokens _ nErrors _ 0}; ScanReset: PUBLIC PROC RETURNS [NAT, NAT] ~ { CompilerUtil.ReleaseStream[source]; zone.FREE[@tB]; IF buffer # NIL THEN zone.FREE[@buffer]; CompilerUtil.ReleaseZone[zone]; zone _ NIL; RETURN [nTokens, nErrors]}; -- error handling StreamIndex: TYPE ~ FileStream.FileByteIndex; ResetScanIndex: PUBLIC PROC [index: CARDINAL] RETURNS [success: BOOL] ~ { IF ~(index IN [tOrigin .. tLimit)) THEN { page: CARDINAL ~ index/Environment.charsPerPage; tOrigin _ tLimit _ page*Environment.charsPerPage; tMax _ 0; tEnded _ FALSE; FileStream.SetIndex[stream, streamOrigin + tOrigin]; FillBuffer[]}; tI _ index - tOrigin; IF tI >= tMax THEN FillBuffer[]; char _ tB[tI]; RETURN [TRUE]}; ErrorCode: TYPE ~ {number, string, char, atom, escape}; ScanError: PROC [code: ErrorCode, tokenIndex: CARDINAL] ~ { errorStream: Stream.Handle _ CompilerUtil.AcquireStream[log]; nErrors _ nErrors + 1; ErrorContext[errorStream, SELECT code FROM $number => "invalid number"L, $string => "string unterminated or too long"L, $char => "invalid character"L, $atom => "invalid atom"L, $escape => "invalid escape sequence"L, ENDCASE => NIL, tokenIndex]; CharIO.PutChar[errorStream, '\n]; CompilerUtil.ReleaseStream[log]}; ErrorContext: PUBLIC PROC [ to: Stream.Handle, message: Strings.String, tokenIndex: CARDINAL] ~ { OPEN CharIO; saveIndex: StreamIndex ~ FileStream.GetIndex[stream]; origin: StreamIndex ~ streamOrigin + tokenIndex; start, lineIndex: StreamIndex _ origin; char: CHAR; n: [1..100]; FOR n IN [1..100] UNTIL lineIndex = 0 DO lineIndex _ lineIndex - 1; FileStream.SetIndex[stream, lineIndex]; IF stream.GetChar[] = Ascii.CR THEN EXIT; start _ lineIndex; ENDLOOP; FileStream.SetIndex[stream, start]; FOR n IN [1..100] UNTIL FileStream.EndOf[stream] DO char _ stream.GetChar[]; SELECT char FROM Ascii.CR, Ascii.ControlZ => EXIT; ENDCASE => PutChar[to, char]; ENDLOOP; CharIO.PutChar[to, Ascii.CR]; FileStream.SetIndex[stream, start]; UNTIL FileStream.GetIndex[stream] = origin OR FileStream.EndOf[stream] DO char _ stream.GetChar[]; PutChar[to, IF char = Ascii.TAB THEN '\t ELSE ' ]; ENDLOOP; PutString[to, "^ "L]; PutString[to, message]; PutString[to, " ["L]; PutNumber[to, tokenIndex, [base~10, zerofill~FALSE, unsigned~TRUE, columns~0]]; PutChar[to, ']]; CharIO.PutChar[to, '\n]; FileStream.SetIndex[stream, saveIndex]}; }.