DIRECTORY Ascii: TYPE USING [BS, ControlZ, CR, FF, LF, NUL, TAB], Basics: TYPE USING [charsPerWord], ConvertUnsafe: TYPE USING [SubString], IO: TYPE USING [card, EndOf, GetIndex, GetChar, PutChar, PutF, rope, SetIndex, STREAM, UnsafeGetBlock], HashOps: TYPE USING [EnterString], P1: TYPE USING [Token, Value, nullValue], ParseTable: TYPE USING [ HashIndex, HashTableRef, IndexTableRef, ScanTableRef, TableRef, VocabularyRef, endMarker, tokenID, tokenSTR], PrincOps: TYPE USING [wordsPerPage], RefText: TYPE USING [Append], Rope USING [ROPE]; Scanner: PROGRAM IMPORTS HashOps, IO, RefText EXPORTS P1 = { OPEN ParseTable; tablePtr: ParseTable.TableRef; hashTab: HashTableRef; scanTab: ScanTableRef; vocab: VocabularyRef; vocabIndex: IndexTableRef; InstallScanTable: PUBLIC PROC [base: ParseTable.TableRef] = { tablePtr _ base; hashTab _ @tablePtr[tablePtr.scanTable.hashTab]; scanTab _ @tablePtr[tablePtr.scanTable.scanTab]; vocab _ LOOPHOLE[@tablePtr[tablePtr.scanTable.vocabBody]]; vocabIndex _ @tablePtr[tablePtr.scanTable.vocabIndex]}; stream: IO.STREAM _ NIL; -- the input stream streamOrigin: StreamIndex; Logger: PROC [PROC [log: IO.STREAM]] _ NIL; textPages: NAT ~ 6; textWords: NAT ~ textPages*PrincOps.wordsPerPage; textChars: NAT ~ textWords*Basics.charsPerWord; TextBuffer: TYPE ~ PACKED ARRAY [0..textChars) OF CHAR; tB: REF TextBuffer; tI, tMax: [0..textChars]; tOrigin, tLimit: CARDINAL; tEnded: BOOL; FillBuffer: PROC ~ { tOrigin _ tLimit; IF tEnded THEN tMax _ 0 ELSE { tMax _ stream.UnsafeGetBlock [[LOOPHOLE[tB, LONG POINTER TO PACKED ARRAY [0..0) OF CHAR], 0, textChars ]].nBytesRead; IF tMax < textChars THEN tEnded _ TRUE; tLimit _ tOrigin + tMax}; IF tMax = 0 THEN {tB[0] _ Ascii.NUL; tMax _ 1}; tI _ 0}; buffer: REF TEXT _ NIL; -- token assembly area iMax: CARDINAL; -- iMax = buffer.maxLength desc: ConvertUnsafe.SubString; -- initial buffer segment nTokens: NAT; -- token count nErrors: NAT; -- lexical errors BufferOverflow: ERROR ~ CODE; ExpandBuffer: PROC ~ { oldBuffer: REF TEXT _ buffer; IF oldBuffer.length > 2000 THEN ERROR BufferOverflow; buffer _ NEW[TEXT[2*oldBuffer.length]]; desc.base _ LOOPHOLE[buffer, LONG STRING]; buffer _ RefText.Append[buffer, oldBuffer]; iMax _ buffer.length _ buffer.maxLength; oldBuffer _ NIL}; char: CHAR; -- current (most recently scanned) character qDot: BOOL; -- used to resolved decimal point vs. interval NextChar: PROC ~ { -- also expanded inline within Atom IF (tI_tI+1) = tMax THEN FillBuffer[]; char _ tB[tI]}; Atom: PUBLIC PROC RETURNS [token: P1.Token] ~ { OPEN token; DO WHILE char IN [Ascii.NUL..' ] DO SELECT char FROM Ascii.NUL => { -- ^@^@ is Tioga escape seq IF (tI_tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char _ tB[tI]; IF char = Ascii.NUL THEN GO TO EndFile}; Ascii.ControlZ => -- ^Z is Bravo escape char UNTIL char = Ascii.CR DO IF (tI_tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char _ tB[tI]; ENDLOOP; ENDCASE => { IF (tI_tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char _ tB[tI]}; ENDLOOP; index _ tOrigin + tI; value _ P1.nullValue; SELECT char FROM 'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm, 'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z => { i: CARDINAL _ 0; DO buffer[i] _ char; IF (tI_tI+1) = tMax THEN FillBuffer[]; char _ tB[tI]; SELECT char FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => IF (i _ i+1) >= iMax THEN ExpandBuffer[]; ENDCASE => EXIT; ENDLOOP; desc.length _ i+1; class _ tokenID; value.r _ HashOps.EnterString[desc]; GO TO GotNext}; 'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M, 'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z => { i: CARDINAL _ 0; uId: BOOL _ TRUE; first, last: NAT _ char.ORD; DO buffer[i] _ char; IF (tI_tI+1) = tMax THEN FillBuffer[]; char _ tB[tI]; SELECT char FROM IN ['A..'Z] => { last _ char.ORD; IF (i _ i+1) >= iMax THEN ExpandBuffer[]}; IN ['a..'z], IN ['0..'9] => { uId _ FALSE; IF (i _ i+1) >= iMax THEN ExpandBuffer[]}; ENDCASE => EXIT; ENDLOOP; i _ i+1; IF uId THEN { h: HashIndex _ ((first*128-first) + last) MOD HashIndex.LAST + 1; j, s1, s2: CARDINAL; WHILE (j _ hashTab[h].symbol) # 0 DO IF vocabIndex[j]-(s2_vocabIndex[j-1]) = i THEN FOR s1 IN [0 .. i) DO IF buffer[s1] # vocab.text[s2] THEN EXIT; s2 _ s2+1; REPEAT FINISHED => {class _ j; GO TO GotNext}; ENDLOOP; IF (h _ hashTab[h].link) = 0 THEN EXIT; ENDLOOP}; desc.length _ i; class _ tokenID; value.r _ HashOps.EnterString[desc]; GO TO GotNext}; ',, ';, ':, '_, '#, '~, '+, '*, '/, '^, '@, '!, '=, '., '(, '), '[, '], '{, '} => { class _ scanTab[char]; GO TO GetNext}; '" => { i: CARDINAL _ 0; valid: BOOL; advance: BOOL _ TRUE; DO IF advance THEN { IF (tI_tI+1) = tMax THEN {IF tEnded THEN GO TO EOFEnd; FillBuffer[]}; char _ tB[tI]}; SELECT char FROM '" => { IF (tI_tI+1) = tMax THEN FillBuffer[]; char _ tB[tI]; IF char # '" THEN GO TO QuoteEnd}; ENDCASE; IF i >= iMax THEN ExpandBuffer[ ! BufferOverflow => {ScanError[string, index]; i _ 0; CONTINUE}]; [buffer[i], valid, advance] _ Escape[]; i _ i+1; IF ~valid THEN ScanError[$escape, tOrigin + tI]; REPEAT QuoteEnd => NULL; EOFEnd => {ScanError[$string, index]; FillBuffer[]; char _ tB[tI]}; ENDLOOP; desc.length _ i; value.r _ HashOps.EnterString[desc]; class _ tokenSTR; GO TO GotNext}; '- => { NextChar[]; IF char # '- THEN { class _ scanTab['-]; IF class = 0 THEN ScanError[char, index-1]; GO TO GotNext}; char _ Ascii.NUL; DO pChar: CHAR ~ char; IF (tI_tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]}; char _ tB[tI]; SELECT char FROM '- => IF pChar = '- THEN EXIT; Ascii.CR => EXIT; ENDCASE; ENDLOOP; NextChar[]}; '< => { NextChar[]; SELECT char FROM '< => { state: {plain, leftBrocket, rightBrocket} _ $plain; nest: CARDINAL _ 1; DO IF (tI_tI+1) = tMax THEN { IF tEnded THEN GO TO EndFile; FillBuffer[]}; char _ tB[tI]; SELECT char FROM '> => SELECT state FROM $plain, $leftBrocket => state _ $rightBrocket; $rightBrocket => { state _ $plain; nest _ nest - 1; IF nest = 0 THEN EXIT}; ENDCASE; '< => SELECT state FROM $plain, $rightBrocket => state _ $leftBrocket; $leftBrocket => {state _ $plain; nest _ nest + 1}; ENDCASE; ENDCASE => state _ $plain; ENDLOOP; NextChar[]}; ENDCASE => ScanError[$char, index]}; ENDCASE => { class _ scanTab[char]; IF class # 0 THEN GO TO GetNext; NextChar[]; ScanError[$char, index]}; REPEAT GetNext => {IF (tI_tI+1) = tMax THEN FillBuffer[]; char _ tB[tI]}; GotNext => NULL; EndFile => { class _ endMarker; index _ tOrigin + (tI-1); value _ P1.nullValue; UNTIL tEnded DO FillBuffer[] ENDLOOP; -- flush stream FillBuffer[]; char _ tB[tI]}; ENDLOOP; nTokens _ nTokens + 1; RETURN}; Digit: ARRAY CHAR ['0..'9] OF [0..9] ~ [0,1,2,3,4,5,6,7,8,9]; escapeMark: CHAR ~ '\\; Escape: PROC RETURNS [c: CHAR, valid, advance: BOOL_TRUE] ~ { c _ char; IF c = escapeMark THEN { NextChar[]; SELECT char FROM 'n, 'N => c _ Ascii.CR; 'r, 'R => c _ Ascii.CR; 'l, 'L => c _ Ascii.LF; 't, 'T => c _ Ascii.TAB; 'b, 'B => c _ Ascii.BS; 'f, 'F => c _ Ascii.FF; '', '", escapeMark => c _ char; IN ['0 .. '7] => { nc, v: CARDINAL _ 0; DO IF ~(char IN ['0..'7]) THEN {valid _ advance _ FALSE; EXIT}; v _ 8*v + Digit[char]; IF (nc _ nc+1) = 3 THEN EXIT; NextChar[]; ENDLOOP; IF v > 377b THEN {valid _ FALSE; v _ 0}; c _ VAL[v]}; ENDCASE => valid _ advance _ FALSE}; RETURN}; ScanInit: PUBLIC PROC [ source: IO.STREAM, logger: PROC [PROC [log: IO.STREAM]]] ~ { stream _ source; Logger _ logger; IF buffer = NIL THEN buffer _ NEW[TEXT[256]]; desc.base _ LOOPHOLE[buffer, LONG STRING]; desc.offset _ 0; iMax _ buffer.length _ buffer.maxLength; streamOrigin _ IO.GetIndex[stream]; tB _ NEW[TextBuffer]; tOrigin _ tLimit _ 0; tMax _ 0; tEnded _ FALSE; FillBuffer[]; char _ tB[tI]; qDot _ FALSE; nTokens _ nErrors _ 0}; ScanReset: PUBLIC PROC RETURNS [NAT, NAT] ~ { stream _ NIL; Logger _ NIL; tB _ NIL; buffer _ NIL; desc.base _ NIL; RETURN [nTokens, nErrors]}; StreamIndex: TYPE ~ INT; -- FileStream.FileByteIndex charsPerPage: CARDINAL = Basics.charsPerWord*PrincOps.wordsPerPage; ResetScanIndex: PUBLIC PROC [index: CARDINAL] RETURNS [success: BOOL] ~ { IF ~(index IN [tOrigin .. tLimit)) THEN { page: CARDINAL ~ index/charsPerPage; tOrigin _ tLimit _ page*charsPerPage; tMax _ 0; tEnded _ FALSE; IO.SetIndex[stream, streamOrigin + tOrigin]; FillBuffer[]}; tI _ index - tOrigin; IF tI >= tMax THEN FillBuffer[]; char _ tB[tI]; RETURN [TRUE]}; ErrorCode: TYPE ~ {number, string, char, atom, escape}; ScanError: PROC [code: ErrorCode, tokenIndex: CARDINAL] ~ { Inner: PROC [log: IO.STREAM] ~ { ErrorContext[log, SELECT code FROM $number => "invalid number", $string => "string unterminated or too long", $char => "invalid character", $atom => "invalid atom", $escape => "invalid escape sequence", ENDCASE => NIL, tokenIndex]; IO.PutChar[log, '\n]}; nErrors _ nErrors + 1; Logger[Inner]}; ErrorContext: PUBLIC PROC [ to: IO.STREAM, message: Rope.ROPE, tokenIndex: CARDINAL] ~ { saveIndex: StreamIndex ~ IO.GetIndex[stream]; origin: StreamIndex ~ streamOrigin + tokenIndex; start, lineIndex: StreamIndex _ origin; char: CHAR; n: [1..100]; FOR n IN [1..100] UNTIL lineIndex = 0 DO lineIndex _ lineIndex - 1; IO.SetIndex[stream, lineIndex]; IF stream.GetChar[] = Ascii.CR THEN EXIT; start _ lineIndex; ENDLOOP; IO.SetIndex[stream, start]; FOR n IN [1..100] UNTIL IO.EndOf[stream] DO char _ stream.GetChar[]; SELECT char FROM Ascii.CR, Ascii.ControlZ => EXIT; ENDCASE => IO.PutChar[to, char]; ENDLOOP; IO.PutChar[to, Ascii.CR]; IO.SetIndex[stream, start]; UNTIL IO.GetIndex[stream] = origin OR IO.EndOf[stream] DO char _ stream.GetChar[]; IO.PutChar[to, IF char = Ascii.TAB THEN '\t ELSE ' ]; ENDLOOP; IO.PutF[to, "^ %g[%d]\n", IO.rope[message], IO.card[tokenIndex]]; IO.SetIndex[stream, saveIndex]}; }. dfile ProtoScanner.mesa last modified by Satterthwaite, January 10, 1983 2:20 pm derived from Compiler>Scanner.mesa Last Edited by: Maxwell, August 11, 1983 2:22 pm Last Edited by: Paul Rovner, September 22, 1983 9:49 pm table installation scanner state numerical conversion character and string constants initialization/finalization error handling ʘJšœ™Jšœ8™8Jšœ"™"J™0J™7J™šÏk ˜ Jšœœœœ œœœœœ˜7Jšœœœ˜"Jšœœœ ˜&JšœœœAœ˜gJšœ œœ˜"Jšœœœ˜)šœ œœ˜J˜NJ˜—Jšœ œœ˜$Jšœ œœ ˜Jšœœœ˜J˜—šœ ˜Jšœ œ˜Jšœ˜Jšœ ˜J˜—Jšœ™˜J˜J˜J˜J˜J˜J˜šÏnœœœ ˜=J˜J˜0J˜0Jšœœ*˜:J˜7J˜J˜——Jšœ ™ ˜JšœœœœÏc˜-J˜J˜Jš žœœœœœœ˜+J˜Jšœ œ˜Jšœ œ#˜1Jšœ œ!˜/Jš œ œœœœœ˜7J˜Jšœœ ˜J˜Jšœœ˜Jšœœ˜ J˜J˜šž œœ˜J˜Jšœœ ˜šœ˜šœ˜šœ˜šœœœœœœœœœ˜=Jšœ˜Jšœ ˜ —Jšœ˜——Jšœœ œ˜'J˜—Jšœ œœ ˜/J˜J˜J˜—JšœœœŸ˜/JšœœŸ˜,JšœŸ˜8J˜Jšœ œŸ˜Jšœ œŸ˜"J˜Jšœœœ˜J˜šž œœ˜Jšœ œ ˜Jšœœœ˜5Jšœ œœ˜'Jšœ œ œœ˜*Jšœ+˜+J˜(Jšœ œ˜J˜J˜—JšœœŸ,˜9JšœœŸ.˜;J˜šžœœŸ#˜6Jšœœ˜6J˜J˜—šžœœœœ˜/Jšœ˜ š˜šœœœ˜ šœ˜šœœŸ˜+Jš œœœœœœ˜FJ˜Jš œœœœœ ˜(—šœŸ˜-šœœ˜Jš œœœœœœ˜FJ˜Jšœ˜——šœ˜ Jš œœœœœœ˜FJ˜——Jšœ˜—J˜,Jšœ˜˜J˜3˜8Jšœœ˜š˜J˜Jšœœ˜&J˜šœ˜šœ œ œ ˜(Jšœœ˜)—Jšœœ˜—Jšœ˜—J˜J˜6Jšœœ ˜J˜—J˜3˜8Jšœœ˜Jšœœœ˜Jšœ œœ˜š˜J˜Jšœœ˜&J˜šœ˜šœ˜Jšœ œœœ˜;—šœ œ˜Jšœœœœ˜7—Jšœœ˜—Jšœ˜—J˜šœœ˜ Jšœ*œ œ˜AJšœ œ˜šœ˜$šœ(˜.šœœ ˜Jšœœœ˜)J˜ š˜Jšœœœ ˜'—Jšœ˜——Jšœœœ˜'Jšœ˜ ——J˜J˜6Jšœœ ˜J˜—J˜/J˜˜Jšœœœ ˜&J˜—˜Jšœœ˜Jšœœ˜ Jšœ œœ˜šœ˜šœ œ˜Jš œœœœœœ˜EJ˜—šœ˜˜Jšœœ˜&J˜Jšœ œœœ ˜"—Jšœ˜—šœ œ˜Jšœ6œ˜A—J˜1Jšœœ"˜0š˜Jšœ œ˜J˜C—Jšœ˜—J˜J˜$Jšœœœ ˜!J˜—˜J˜ šœ œ˜J˜Jšœ œ˜+Jšœœ ˜—Jšœ œ˜š˜Jšœœ˜Jš œœœœœœ˜GJ˜šœ˜Jšœœ œœ˜Jšœœœ˜Jšœ˜—Jšœ˜—J˜ J˜—˜J˜ šœ˜˜J˜3Jšœœ˜š˜šœœ˜Jšœœœœ˜,—J˜šœ˜šœœ˜J˜.˜Jšœ!œ œœ˜8—Jšœ˜—šœœ˜J˜.J˜2Jšœ˜—Jšœ˜—Jšœ˜—J˜ —Jšœ˜$J˜——šœ˜ J˜Jšœ œœœ ˜ J˜ J˜J˜——š˜Jšœ œœ˜BJšœ œ˜˜ J˜DJšœœœŸ˜6J˜——Jšœ˜—J˜Jšœ˜J˜J˜—Jšœ™˜Jšœœœ œ ˜=J˜J˜—Jšœ™˜Jšœ œ˜J˜š žœœœœœœ˜=J˜ šœœ˜J˜ šœ˜Jšœœ˜Jšœœ˜Jšœœ˜Jšœœ˜Jšœœ˜Jšœœ˜J˜šœ˜Jšœœ˜š˜Jš œœ œœœ˜