-- file SakuraScanner.Mesa -- derived from Compiler>Scanner.Mesa -- last modified by Satterthwaite, January 12, 1981 12:37 PM -- last edit by Russ Atkinson, 9-Jul-81 14:23:40 -- last edited by Suzuki, 15-Nov-81 19:37:06 DIRECTORY IOStream: TYPE USING [CR, FF, TAB, Handle, PutChar], Inline: TYPE USING [LowHalf, HighHalf], PPCommentTable: TYPE USING [AddComment, AddBreakHint, GetEnding, Index, Reset, SetEnding], PPLeaves: TYPE USING [HTIndex, HTNode, LTIndex, LTNode], SakuraOps: TYPE USING [GetLog, GetSource], PPP1: TYPE USING [Token, Value, NullValue], ParseTable: TYPE USING [TableRef, HashIndex, TSymbol, VocabHashEntry, EndMarker, tokenARROW, tokenATOM, tokenCHAR, tokenDOT, tokenDOTS, tokenEQUAL, tokenGE, tokenGREATER, tokenID, tokenLE, tokenLESS, tokenFLNUM, tokenLNUM, tokenLSTR, tokenMINUS, tokenNUM, tokenSTR, tokenRARROW, tokenBAR, tokenNDET, tokenPAR, tokenDIV], SakuraCommon: TYPE USING [tty], SakuraUtil: TYPE USING [PutDecimal, PutRope], Real: TYPE USING [RealException, PairToReal], Rope: TYPE USING [Fetch, Flatten, FromChar, Map, Ref, Run, Size, Text], TTY: TYPE USING [Handle, PutChar]; SakuraScanner: PROGRAM IMPORTS IOStream, PPCommentTable, SakuraOps, SakuraUtil, Inline, Real, Rope, SakuraCommon, TTY EXPORTS PPLeaves, PPP1 = BEGIN OPEN IOStream, PPLeaves, ParseTable, P1: PPP1, SakuraUtil; Index: TYPE = PPCommentTable.Index; hashTab: LONG POINTER TO ARRAY HashIndex OF VocabHashEntry; scanTab: LONG POINTER TO ARRAY CHARACTER [40C..177C] OF TSymbol; vocab: LONG STRING; vocabIndex: LONG POINTER TO ARRAY TSymbol OF CARDINAL; NUL: CHARACTER = 0C; ControlZ: CHARACTER = 32C; -- Bravo escape char rf: Rope.Ref ← NIL; -- the source tPos,tLimit: Index ← 0; tty: TTY.Handle ← SakuraCommon.tty; debugging: BOOLEAN ← FALSE; TTYaction: PROC [c: CHARACTER] RETURNS [BOOLEAN] = { TTY.PutChar[tty, c]; RETURN [FALSE]}; Shorten: PROC [x: Index] RETURNS [NAT] = INLINE { IF Inline.HighHalf[x] # 0 THEN ERROR; RETURN [Inline.LowHalf[x]]}; AtEof: PROC RETURNS [BOOLEAN] = INLINE { RETURN [char = NUL AND tPos >= tLimit]}; toklen: NAT ← 0; -- current token length tokpos: Index ← 0; -- source index for start of token nTokens: CARDINAL; -- token count nErrors: CARDINAL; -- lexical errors lastToken: Index ← 0; FirstChar: PROC [] = INLINE { tokpos ← tPos; toklen ← 1}; AddChar: PROC [] = INLINE { IF toklen = 0 THEN tokpos ← tPos; toklen ← toklen + 1}; AddCharPlus: PROC [] = INLINE { IF toklen = 0 THEN tokpos ← tPos; toklen ← toklen + 1; NextChar[]}; NextChar: PROC = INLINE { char ← rf.Fetch[tPos ← tPos + 1 ! ANY => {char ← NUL; tPos ← tLimit; CONTINUE}]}; char: CHARACTER; -- current (most recently scanned) character qDot: BOOLEAN; -- used to resolved decimal point vs. interval IdFromRope: PUBLIC PROC [r: Rope.Ref, index: Index] RETURNS [HTIndex] = { RETURN [NEW[HTNode ← [index: index, name: r]]]}; IdFromBuffer: PROC [index: Index] RETURNS [HTIndex] = INLINE { RETURN [IdFromRope[BufferToText[], index]]; }; IdFirst: HTIndex ← IdFromRope["first", LAST[Index]]; IDLock: HTIndex ← IdFromRope["LOCK", LAST[Index]]; IDRest: HTIndex ← IdFromRope["rest", LAST[Index]]; IdOfFirst: PUBLIC PROC RETURNS [HTIndex] = {RETURN [IdFirst]}; IdOfLock: PUBLIC PROC RETURNS [HTIndex] = {RETURN [IDLock]}; IdOfRest: PUBLIC PROC RETURNS [HTIndex] = {RETURN [IDRest]}; FFrope: Rope.Ref = Rope.FromChar[FF]; Atom: PUBLIC PROC RETURNS [token: P1.Token] = { OPEN token; DO CRcount: NAT ← 0; IF char IN [NUL..' ] THEN { inBravo: BOOLEAN ← FALSE; scan: PROC [c: CHARACTER] RETURNS [BOOLEAN] = { IF c > 40C AND NOT inBravo THEN {char ← c; RETURN [TRUE]}; tPos ← tPos + 1; SELECT c FROM CR => {IF inBravo THEN inBravo ← FALSE; CRcount ← CRcount + 1; PPCommentTable.AddBreakHint[tPos]}; FF => {PPCommentTable.AddComment[tPos, FFrope, lastToken, CRcount]; CRcount ← 0}; ControlZ => {inBravo ← TRUE}; ENDCASE; RETURN [FALSE]; }; IF NOT rf.Map[tPos, tLimit, scan] THEN GO TO EndFile; }; toklen ← 0; token.index ← tPos; token.value ← P1.NullValue; IF CRcount > 1 THEN -- remember extra blank lines {PPCommentTable.AddComment[tPos-1, NIL, lastToken, CRcount - 1]; CRcount ← 1}; SELECT char FROM 'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm, 'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z => {scan: PROC [c: CHARACTER] RETURNS [BOOLEAN] = { SELECT c FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {}; ENDCASE => {char ← c; RETURN [TRUE]}; toklen ← toklen + 1; RETURN [FALSE]}; FirstChar[]; char ← 0C; [] ← rf.Map[tPos+1, tLimit, scan]; tPos ← tPos + toklen; class ← tokenID; token.value.r ← IdFromBuffer[tokpos]; GO TO GotNext}; 'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M, 'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z => {first, last: CARDINAL ← char - 0C; uId: BOOLEAN ← TRUE; scan: PROC [c: CHARACTER] RETURNS [BOOLEAN] = { SELECT c FROM IN ['A..'Z] => last ← c - 0C; IN ['a..'z], IN ['0..'9] => uId ← FALSE; ENDCASE => {char ← c; RETURN [TRUE]}; toklen ← toklen + 1; RETURN [FALSE]}; FirstChar[]; char ← 0C; [] ← rf.Map[tPos+1, tLimit, scan]; tPos ← tPos + toklen; IF uId THEN {j: CARDINAL ← 0; len: NAT ← toklen; h: HashIndex ← (first * 128 - first + last) MOD LAST[HashIndex] + 1; WHILE (j ← hashTab[h].symbol) # 0 DO s2: CARDINAL ← vocabIndex[j - 1]; IF vocabIndex[j] - s2 = len THEN FOR s1: CARDINAL IN [0..len) DO IF rf.Fetch[tokpos+s1] # vocab[s2] THEN EXIT; s2 ← s2 + 1 REPEAT FINISHED => {class ← j; GO TO CheckEnd}; ENDLOOP; IF (h ← hashTab[h].link) = 0 THEN EXIT ENDLOOP}; class ← tokenID; token.value.r ← IdFromBuffer[tokpos]; GO TO GotNext}; '0, '1, '2, '3, '4, '5, '6, '7, '8, '9 => {v, v10, v8: LONG CARDINAL ← 0; exp: INTEGER; valid, valid10, valid8: BOOLEAN ← TRUE; point: BOOLEAN ← FALSE; MaxWord: CARDINAL = LAST[CARDINAL]; WHILE char IN ['0..'9] DO IF valid10 THEN [v10, valid10] ← AppendDigit10[v10, char]; IF valid8 THEN [v8, valid8] ← AppendDigit8[v8, char]; AddCharPlus[] ENDLOOP; IF char = '. THEN {NextChar[]; IF char = '. THEN qDot ← TRUE ELSE {point ← TRUE; AddChar[]; [v10, exp, valid10] ← ScanFraction[v10, valid10]}}; SELECT char FROM 'e, 'E => {AddCharPlus[]; v ← v10; [exp, valid] ← ScanExponent[exp, valid10]; class ← tokenFLNUM}; 'b, 'B => {AddCharPlus[]; v ← v8; [exp, valid] ← ScanScaleFactor[valid8 AND NOT point]; THROUGH [1..exp] WHILE valid DO [v, valid] ← AppendDigit8[v, '0] ENDLOOP; class ← tokenLNUM}; 'c, 'C => {AddCharPlus[]; v ← v8; valid ← valid8 AND NOT point AND v <= 255; class ← tokenCHAR}; 'd, 'D => {AddCharPlus[]; v ← v10; [exp, valid] ← ScanScaleFactor[valid10 AND NOT point]; THROUGH [1..exp] WHILE valid DO [v, valid] ← AppendDigit10[v, '0] ENDLOOP; class ← tokenLNUM}; ENDCASE => {v ← v10; valid ← valid10; class ← IF point THEN tokenFLNUM ELSE tokenLNUM}; SELECT class FROM tokenCHAR => token.value ← EnterLit[v]; tokenFLNUM => [token.value, valid] ← EnterFloating[v, exp, valid]; ENDCASE => IF v > MaxWord THEN token.value ← EnterLongLit[v] ELSE {class ← tokenNUM; token.value ← EnterLit[v]}; IF NOT valid THEN ScanError[number, tokpos]; GO TO GotNext}; ',, ';, ':, '←, '#, '~, '+, '*, '↑, '@, '!, '(, '), '[, '], '{ => {class ← scanTab[char]; GO TO GetNext}; '} => {class ← scanTab[char]; NextChar[]; IF char = '. THEN AccumulateEnding[]; GO TO GotNext}; '' => {AddCharPlus[]; AddChar[]; class ← tokenCHAR; token.value ← EnterLit[char - 0C]; GO TO GetNext}; '" => {quoteSeen: BOOLEAN ← FALSE; DO NextChar[]; SELECT char FROM '" => {NextChar[]; IF char # '" THEN GO TO QuoteEnd; quoteSeen ← TRUE}; ENDCASE; AddChar[]; IF toklen = LAST[NAT] THEN {ScanError[string, token.index]; toklen ← 0}; REPEAT QuoteEnd => NULL ENDLOOP; token.value ← EnterText[quoteSeen]; IF char = 'L THEN {class ← tokenLSTR; GO TO GetNext} ELSE {class ← tokenSTR; GO TO GotNext}}; '$ => {NextChar[]; SELECT char FROM IN ['a..'z], IN ['A..'Z] => NULL; ENDCASE => ScanError[atom, token.index]; DO -- accumulate rest of atom name SELECT char FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {}; ENDCASE => EXIT; AddCharPlus[] ENDLOOP; class ← tokenATOM; token.value ← EnterText[]; GO TO GotNext}; '- => {-- comment processing pChar: CHARACTER ← NUL; scan: PROC [c: CHARACTER] RETURNS [BOOLEAN] = { toklen ← toklen + 1; IF c = CR THEN {char ← c; RETURN [TRUE]}; IF (c = '- AND pChar = c) THEN {char ← c; RETURN [TRUE]}; pChar ← c; RETURN[FALSE]; }; tokpos ← tPos; NextChar[]; IF char = '> THEN {class ← tokenRARROW; GOTO GetNext}; IF char # '- THEN {class ← tokenMINUS; GO TO GotNext}; toklen ← 2; char ← 0C; [] ← rf.Map[tPos+1, tLimit, scan]; tPos ← tokpos + toklen - 1; {comment: Rope.Text ← BufferToText[]; PPCommentTable.AddComment[tokpos, comment, lastToken, CRcount]; lastToken ← tokpos; IF comment.Fetch[0] # '- OR comment.Fetch[1] # '- THEN ERROR; SELECT char FROM '- => {len: NAT = Inline.LowHalf[Rope.Size[comment]]; IF comment.Fetch[len-1] # '- OR comment.Fetch[len-2] # '- THEN ERROR}; CR => {len: NAT = Inline.LowHalf[Rope.Size[comment]]; IF comment.Fetch[len-1] # CR THEN ERROR}; 0C => {-- end of file }; ENDCASE => ERROR}; IF char # CR THEN NextChar[]}; '| => {NextChar[]; IF char = '| THEN {class ← tokenNDET; GOTO GetNext} ELSE {class ← tokenBAR; GOTO GotNext}}; '/ => {NextChar[]; IF char = '/ THEN {class ← tokenPAR; GOTO GetNext} ELSE {class ← tokenDIV; GOTO GotNext}}; '. => {IF qDot THEN {qDot ← FALSE; class ← tokenDOTS; GO TO GetNext}; NextChar[]; SELECT char FROM '. => {class ← tokenDOTS; GO TO GetNext}; IN ['0..'9] => {v: LONG CARDINAL; exp: INTEGER; valid: BOOLEAN; [v, exp, valid] ← ScanFraction[0, TRUE]; SELECT char FROM 'E, 'e => {NextChar[]; [exp, valid] ← ScanExponent[exp, valid]}; ENDCASE; class ← tokenFLNUM; [token.value, valid] ← EnterFloating[v, exp, valid]; IF NOT valid THEN ScanError[number, token.index]; GO TO GotNext}; ENDCASE => {class ← tokenDOT; GO TO GotNext}}; '= => {NextChar[]; IF char = '> THEN {class ← tokenARROW; GO TO GetNext} ELSE {class ← tokenEQUAL; GO TO GotNext}}; '< => {NextChar[]; IF char = '= THEN {class ← tokenLE; GO TO GetNext} ELSE {class ← tokenLESS; GO TO GotNext}}; '> => {NextChar[]; IF char = '= THEN {class ← tokenGE; GO TO GetNext} ELSE {class ← tokenGREATER; GO TO GotNext}}; ENDCASE => {class ← scanTab[char]; IF class # 0 THEN GO TO GetNext; NextChar[]; ScanError[char, token.index]} REPEAT CheckEnd => IF char = '. AND toklen = 3 AND Rope.Run[rf, tokpos, "END", 0] = 3 THEN AccumulateEnding[]; GetNext => {NextChar[]}; GotNext => NULL; EndFile => {char ← NUL; class ← EndMarker; token.value ← P1.NullValue} ENDLOOP; nTokens ← nTokens + 1; lastToken ← token.index; IF debugging AND token.value.r # NIL THEN { lit: Rope.Ref ← NIL; ref: REF ANY ← token.value.r; TTY.PutChar[tty, 15C]; TTY.PutChar[tty, ' ]; TTY.PutChar[tty, '{ ]; WITH ref SELECT FROM x: REF LTNode => {lit ← x.literal}; x: REF HTNode => {lit ← x.name}; ENDCASE; [] ← Rope.Map[base: lit, action: TTYaction]; TTY.PutChar[tty, '} ]; }; RETURN}; -- numerical conversion Digit: ARRAY CHARACTER ['0..'9] OF CARDINAL = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; AppendDigit10: PROC [v: LONG CARDINAL, digit: CHARACTER ['0..'9]] RETURNS [newV: LONG CARDINAL, valid: BOOLEAN] = { MaxV: LONG CARDINAL = 429496729; -- (2**32-1)/10 MaxD: CARDINAL = 5; -- (2**32-1) MOD 10 d: INTEGER [0..9] = Digit[digit]; valid ← v < MaxV OR v = MaxV AND d <= MaxD; newV ← 10 * v + d; RETURN}; AppendDigit8: PROC [v: LONG CARDINAL, digit: CHARACTER ['0..'9]] RETURNS [newV: LONG CARDINAL, valid: BOOLEAN] = { MaxV: LONG CARDINAL = 3777777777B; -- (2**32-1)/8 MaxD: CARDINAL = 7B; -- (2**32-1) MOD 8 d: INTEGER [0..9] = Digit[digit]; valid ← d < 8 AND (v < MaxV OR v = MaxV AND d <= MaxD); newV ← 8 * v + d; RETURN}; ScanFraction: PROC [v: LONG CARDINAL, valid: BOOLEAN] RETURNS [newV: LONG CARDINAL, exp: INTEGER, newValid: BOOLEAN] = { newV ← v; exp ← 0; newValid ← valid AND char IN ['0..'9]; WHILE char IN ['0..'9] DO IF newValid THEN [newV, newValid] ← AppendDigit10[newV, char]; exp ← exp - 1; AddCharPlus[] ENDLOOP; RETURN}; ScanExponent: PROC [exp: INTEGER, valid: BOOLEAN] RETURNS [newExp: INTEGER, newValid: BOOLEAN] = { op: {plus, minus} ← plus; scale: INTEGER; SELECT char FROM '+ => AddCharPlus[]; '- => {op ← minus; AddCharPlus[]}; ENDCASE; [scale, newValid] ← ScanScaleFactor[valid AND char IN ['0..'9]]; newExp ← IF op = plus THEN exp + scale ELSE exp - scale; RETURN}; ScanScaleFactor: PROC [valid: BOOLEAN] RETURNS [scale: INTEGER, newValid: BOOLEAN] = { Max: INTEGER = (32767 - 9) / 10; -- (2**15-10)/10 newValid ← valid; scale ← 0; WHILE char IN ['0..'9] DO newValid ← valid AND scale <= Max; IF newValid THEN scale ← 10 * scale + Digit[char]; AddCharPlus[] ENDLOOP; RETURN}; BufferToText: PROC RETURNS [r: Rope.Text] = INLINE { RETURN [rf.Flatten[tokpos, toklen]]}; WrapLit: PROC [r: REF ANY] RETURNS [LTIndex] = { RETURN [NEW[LTNode ← [index: tokpos, value: r, literal: BufferToText[]]]]}; EnterLit: PROC [v: LONG CARDINAL] RETURNS [P1.Value] = { vRep: ARRAY INTEGER [0..SIZE[LONG UNSPECIFIED]) OF WORD ← LOOPHOLE[v]; RETURN [[r: WrapLit[NEW[CARDINAL ← vRep[0]]]]]}; EnterLongLit: PROC [v: LONG CARDINAL] RETURNS [P1.Value] = { RETURN [[r: WrapLit[NEW[LONG CARDINAL ← v]]]]}; EnterFloating: PROC [v: LONG CARDINAL, exp: INTEGER, valid: BOOLEAN] RETURNS [value: P1.Value, newValid: BOOLEAN] = { IF v > 2147483647 -- 2**31 - 1 THEN {newValid ← FALSE; v ← 0} ELSE newValid ← valid; value.r ← WrapLit [NEW[REAL ← Real.PairToReal [v, exp ! Real.RealException => {newValid ← FALSE; RESUME [vp]}]]]; RETURN}; -- ending accumulation AccumulateEnding: PROC = { -- accumulate the ending comment, and restore the input context end: Index ← tPos; flushingBravo: BOOLEAN ← FALSE; IF char # '. THEN ERROR; IF PPCommentTable.GetEnding[] > 0 THEN RETURN; -- already processed PPCommentTable.SetEnding[end]; WHILE char = '. AND NOT AtEof[] DO -- flush the ending dots NextChar[] ENDLOOP; toklen ← 0; -- a fresh start on the comment WHILE NOT AtEof[] DO SELECT char FROM ControlZ => flushingBravo ← TRUE; ENDCASE; AddCharPlus[] ENDLOOP; PPCommentTable.AddComment[end + 1, BufferToText[], end, 0]; [] ← ResetScanIndex[end]}; -- string literals EnterText: PROC [removeQuotes: BOOLEAN ← FALSE] RETURNS [P1.Value] = { lti: LTIndex ← WrapLit[NIL]; lti.value ← LOOPHOLE[lti.literal]; RETURN [[r: lti]]}; -- initialization/finalization ScanInit: PUBLIC PROC [table: ParseTable.TableRef] = { hashTab ← @table[table.scanTable.hashTab]; scanTab ← @table[table.scanTable.scanTab]; vocab ← LOOPHOLE[@table[table.scanTable.vocabBody]]; vocabIndex ← @table[table.scanTable.vocabIndex]; rf ← SakuraOps.GetSource[]; tPos ← -1; tLimit ← rf.Size[]; PPCommentTable.Reset[]; lastToken ← 0; NextChar[]; qDot ← FALSE; nTokens ← nErrors ← 0}; ScanReset: PUBLIC PROC RETURNS [CARDINAL, CARDINAL] = { rf ← NIL; RETURN [nTokens, nErrors]}; -- error handling ResetScanIndex: PUBLIC PROC [index: Index] RETURNS [success: BOOLEAN] = { tPos ← index - 1; NextChar[]; RETURN [TRUE]}; ScanError: PROC [code: {number, string, char, atom}, tokenIndex: Index] = { errorStream: IOStream.Handle ← SakuraOps.GetLog[]; nErrors ← nErrors + 1; ErrorContext[errorStream, SELECT code FROM number => "invalid number", string => "string unterminated or too long", char => "invalid character", atom => "invalid atom", ENDCASE => NIL, tokenIndex]; errorStream.PutChar[CR]; }; ErrorContext: PUBLIC PROC [to: IOStream.Handle, message: Rope.Ref, tokenIndex: Index] = { index: Index ← tokenIndex; limit: Index ← tokenIndex - 50; start: Index; IF limit < 0 THEN limit ← 0; to.PutChar[CR]; DO c: CHARACTER ← rf.Fetch[index]; IF c = CR THEN {index ← index+1; EXIT}; IF index = limit THEN EXIT; index ← index - 1; ENDLOOP; start ← index; FOR index IN [start..MIN[start+100,rf.Size[]]) DO c: CHARACTER ← rf.Fetch[index]; IF c=CR THEN EXIT; to.PutChar[c]; ENDLOOP; to.PutChar[CR]; FOR index IN [start..tokenIndex) DO c: CHARACTER ← rf.Fetch[index]; to.PutChar[IF c = TAB THEN TAB ELSE ' ]; ENDLOOP; PutRope[to, "↑ "]; PutRope[to, message]; PutRope[to, " ["]; PutDecimal[to, tokenIndex]; to.PutChar[']]; to.PutChar[CR]}; END.