-- file SakuraScanner.Mesa -- derived from Compiler>Scanner.Mesa -- last modified by Satterthwaite, January 12, 1981 12:37 PM -- last edit by Russ Atkinson, 9-Jul-81 14:23:40 -- last edited by Suzuki, 15-Nov-81 19:37:06 DIRECTORY IOStream: TYPE USING [CR, FF, TAB, Handle, PutChar], Inline: TYPE USING [LowHalf, HighHalf], PPCommentTable: TYPE USING [AddComment, AddBreakHint, GetEnding, Index, Reset, SetEnding], PPLeaves: TYPE USING [HTIndex, HTNode, LTIndex, LTNode], SakuraOps: TYPE USING [GetLog, GetSource], PPP1: TYPE USING [Token, Value, NullValue], ParseTable: TYPE USING [TableRef, HashIndex, TSymbol, VocabHashEntry, EndMarker, tokenARROW, tokenATOM, tokenCHAR, tokenDOT, tokenDOTS, tokenEQUAL, tokenGE, tokenGREATER, tokenID, tokenLE, tokenLESS, tokenFLNUM, tokenLNUM, tokenLSTR, tokenMINUS, tokenNUM, tokenSTR, tokenRARROW, tokenBAR, tokenNDET, tokenPAR, tokenDIV], SakuraCommon: TYPE USING [tty], SakuraUtil: TYPE USING [PutDecimal, PutRope], Real: TYPE USING [RealException, PairToReal], Rope: TYPE USING [Fetch, Flatten, FromChar, Map, Ref, Run, Size, Text], TTY: TYPE USING [Handle, PutChar]; SakuraScanner: PROGRAM IMPORTS IOStream, PPCommentTable, SakuraOps, SakuraUtil, Inline, Real, Rope, SakuraCommon, TTY EXPORTS PPLeaves, PPP1 = BEGIN OPEN IOStream, PPLeaves, ParseTable, P1: PPP1, SakuraUtil; Index: TYPE = PPCommentTable.Index; hashTab: LONG POINTER TO ARRAY HashIndex OF VocabHashEntry; scanTab: LONG POINTER TO ARRAY CHARACTER [40C..177C] OF TSymbol; vocab: LONG STRING; vocabIndex: LONG POINTER TO ARRAY TSymbol OF CARDINAL; NUL: CHARACTER = 0C; ControlZ: CHARACTER = 32C; -- Bravo escape char rf: Rope.Ref _ NIL; -- the source tPos,tLimit: Index _ 0; tty: TTY.Handle _ SakuraCommon.tty; debugging: BOOLEAN _ FALSE; TTYaction: PROC [c: CHARACTER] RETURNS [BOOLEAN] = { TTY.PutChar[tty, c]; RETURN [FALSE]}; Shorten: PROC [x: Index] RETURNS [NAT] = INLINE { IF Inline.HighHalf[x] # 0 THEN ERROR; RETURN [Inline.LowHalf[x]]}; AtEof: PROC RETURNS [BOOLEAN] = INLINE { RETURN [char = NUL AND tPos >= tLimit]}; toklen: NAT _ 0; -- current token length tokpos: Index _ 0; -- source index for start of token nTokens: CARDINAL; -- token count nErrors: CARDINAL; -- lexical errors lastToken: Index _ 0; FirstChar: PROC [] = INLINE { tokpos _ tPos; toklen _ 1}; AddChar: PROC [] = INLINE { IF toklen = 0 THEN tokpos _ tPos; toklen _ toklen + 1}; AddCharPlus: PROC [] = INLINE { IF toklen = 0 THEN tokpos _ tPos; toklen _ toklen + 1; NextChar[]}; NextChar: PROC = INLINE { char _ rf.Fetch[tPos _ tPos + 1 ! ANY => {char _ NUL; tPos _ tLimit; CONTINUE}]}; char: CHARACTER; -- current (most recently scanned) character qDot: BOOLEAN; -- used to resolved decimal point vs. interval IdFromRope: PUBLIC PROC [r: Rope.Ref, index: Index] RETURNS [HTIndex] = { RETURN [NEW[HTNode _ [index: index, name: r]]]}; IdFromBuffer: PROC [index: Index] RETURNS [HTIndex] = INLINE { RETURN [IdFromRope[BufferToText[], index]]; }; IdFirst: HTIndex _ IdFromRope["first", LAST[Index]]; IDLock: HTIndex _ IdFromRope["LOCK", LAST[Index]]; IDRest: HTIndex _ IdFromRope["rest", LAST[Index]]; IdOfFirst: PUBLIC PROC RETURNS [HTIndex] = {RETURN [IdFirst]}; IdOfLock: PUBLIC PROC RETURNS [HTIndex] = {RETURN [IDLock]}; IdOfRest: PUBLIC PROC RETURNS [HTIndex] = {RETURN [IDRest]}; FFrope: Rope.Ref = Rope.FromChar[FF]; Atom: PUBLIC PROC RETURNS [token: P1.Token] = { OPEN token; DO CRcount: NAT _ 0; IF char IN [NUL..' ] THEN { inBravo: BOOLEAN _ FALSE; scan: PROC [c: CHARACTER] RETURNS [BOOLEAN] = { IF c > 40C AND NOT inBravo THEN {char _ c; RETURN [TRUE]}; tPos _ tPos + 1; SELECT c FROM CR => {IF inBravo THEN inBravo _ FALSE; CRcount _ CRcount + 1; PPCommentTable.AddBreakHint[tPos]}; FF => {PPCommentTable.AddComment[tPos, FFrope, lastToken, CRcount]; CRcount _ 0}; ControlZ => {inBravo _ TRUE}; ENDCASE; RETURN [FALSE]; }; IF NOT rf.Map[tPos, tLimit, scan] THEN GO TO EndFile; }; toklen _ 0; token.index _ tPos; token.value _ P1.NullValue; IF CRcount > 1 THEN -- remember extra blank lines {PPCommentTable.AddComment[tPos-1, NIL, lastToken, CRcount - 1]; CRcount _ 1}; SELECT char FROM 'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm, 'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z => {scan: PROC [c: CHARACTER] RETURNS [BOOLEAN] = { SELECT c FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {}; ENDCASE => {char _ c; RETURN [TRUE]}; toklen _ toklen + 1; RETURN [FALSE]}; FirstChar[]; char _ 0C; [] _ rf.Map[tPos+1, tLimit, scan]; tPos _ tPos + toklen; class _ tokenID; token.value.r _ IdFromBuffer[tokpos]; GO TO GotNext}; 'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M, 'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z => {first, last: CARDINAL _ char - 0C; uId: BOOLEAN _ TRUE; scan: PROC [c: CHARACTER] RETURNS [BOOLEAN] = { SELECT c FROM IN ['A..'Z] => last _ c - 0C; IN ['a..'z], IN ['0..'9] => uId _ FALSE; ENDCASE => {char _ c; RETURN [TRUE]}; toklen _ toklen + 1; RETURN [FALSE]}; FirstChar[]; char _ 0C; [] _ rf.Map[tPos+1, tLimit, scan]; tPos _ tPos + toklen; IF uId THEN {j: CARDINAL _ 0; len: NAT _ toklen; h: HashIndex _ (first * 128 - first + last) MOD LAST[HashIndex] + 1; WHILE (j _ hashTab[h].symbol) # 0 DO s2: CARDINAL _ vocabIndex[j - 1]; IF vocabIndex[j] - s2 = len THEN FOR s1: CARDINAL IN [0..len) DO IF rf.Fetch[tokpos+s1] # vocab[s2] THEN EXIT; s2 _ s2 + 1 REPEAT FINISHED => {class _ j; GO TO CheckEnd}; ENDLOOP; IF (h _ hashTab[h].link) = 0 THEN EXIT ENDLOOP}; class _ tokenID; token.value.r _ IdFromBuffer[tokpos]; GO TO GotNext}; '0, '1, '2, '3, '4, '5, '6, '7, '8, '9 => {v, v10, v8: LONG CARDINAL _ 0; exp: INTEGER; valid, valid10, valid8: BOOLEAN _ TRUE; point: BOOLEAN _ FALSE; MaxWord: CARDINAL = LAST[CARDINAL]; WHILE char IN ['0..'9] DO IF valid10 THEN [v10, valid10] _ AppendDigit10[v10, char]; IF valid8 THEN [v8, valid8] _ AppendDigit8[v8, char]; AddCharPlus[] ENDLOOP; IF char = '. THEN {NextChar[]; IF char = '. THEN qDot _ TRUE ELSE {point _ TRUE; AddChar[]; [v10, exp, valid10] _ ScanFraction[v10, valid10]}}; SELECT char FROM 'e, 'E => {AddCharPlus[]; v _ v10; [exp, valid] _ ScanExponent[exp, valid10]; class _ tokenFLNUM}; 'b, 'B => {AddCharPlus[]; v _ v8; [exp, valid] _ ScanScaleFactor[valid8 AND NOT point]; THROUGH [1..exp] WHILE valid DO [v, valid] _ AppendDigit8[v, '0] ENDLOOP; class _ tokenLNUM}; 'c, 'C => {AddCharPlus[]; v _ v8; valid _ valid8 AND NOT point AND v <= 255; class _ tokenCHAR}; 'd, 'D => {AddCharPlus[]; v _ v10; [exp, valid] _ ScanScaleFactor[valid10 AND NOT point]; THROUGH [1..exp] WHILE valid DO [v, valid] _ AppendDigit10[v, '0] ENDLOOP; class _ tokenLNUM}; ENDCASE => {v _ v10; valid _ valid10; class _ IF point THEN tokenFLNUM ELSE tokenLNUM}; SELECT class FROM tokenCHAR => token.value _ EnterLit[v]; tokenFLNUM => [token.value, valid] _ EnterFloating[v, exp, valid]; ENDCASE => IF v > MaxWord THEN token.value _ EnterLongLit[v] ELSE {class _ tokenNUM; token.value _ EnterLit[v]}; IF NOT valid THEN ScanError[number, tokpos]; GO TO GotNext}; ',, ';, ':, '_, '#, '~, '+, '*, '^, '@, '!, '(, '), '[, '], '{ => {class _ scanTab[char]; GO TO GetNext}; '} => {class _ scanTab[char]; NextChar[]; IF char = '. THEN AccumulateEnding[]; GO TO GotNext}; '' => {AddCharPlus[]; AddChar[]; class _ tokenCHAR; token.value _ EnterLit[char - 0C]; GO TO GetNext}; '" => {quoteSeen: BOOLEAN _ FALSE; DO NextChar[]; SELECT char FROM '" => {NextChar[]; IF char # '" THEN GO TO QuoteEnd; quoteSeen _ TRUE}; ENDCASE; AddChar[]; IF toklen = LAST[NAT] THEN {ScanError[string, token.index]; toklen _ 0}; REPEAT QuoteEnd => NULL ENDLOOP; token.value _ EnterText[quoteSeen]; IF char = 'L THEN {class _ tokenLSTR; GO TO GetNext} ELSE {class _ tokenSTR; GO TO GotNext}}; '$ => {NextChar[]; SELECT char FROM IN ['a..'z], IN ['A..'Z] => NULL; ENDCASE => ScanError[atom, token.index]; DO -- accumulate rest of atom name SELECT char FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {}; ENDCASE => EXIT; AddCharPlus[] ENDLOOP; class _ tokenATOM; token.value _ EnterText[]; GO TO GotNext}; '- => {-- comment processing pChar: CHARACTER _ NUL; scan: PROC [c: CHARACTER] RETURNS [BOOLEAN] = { toklen _ toklen + 1; IF c = CR THEN {char _ c; RETURN [TRUE]}; IF (c = '- AND pChar = c) THEN {char _ c; RETURN [TRUE]}; pChar _ c; RETURN[FALSE]; }; tokpos _ tPos; NextChar[]; IF char = '> THEN {class _ tokenRARROW; GOTO GetNext}; IF char # '- THEN {class _ tokenMINUS; GO TO GotNext}; toklen _ 2; char _ 0C; [] _ rf.Map[tPos+1, tLimit, scan]; tPos _ tokpos + toklen - 1; {comment: Rope.Text _ BufferToText[]; PPCommentTable.AddComment[tokpos, comment, lastToken, CRcount]; lastToken _ tokpos; IF comment.Fetch[0] # '- OR comment.Fetch[1] # '- THEN ERROR; SELECT char FROM '- => {len: NAT = Inline.LowHalf[Rope.Size[comment]]; IF comment.Fetch[len-1] # '- OR comment.Fetch[len-2] # '- THEN ERROR}; CR => {len: NAT = Inline.LowHalf[Rope.Size[comment]]; IF comment.Fetch[len-1] # CR THEN ERROR}; 0C => {-- end of file }; ENDCASE => ERROR}; IF char # CR THEN NextChar[]}; '| => {NextChar[]; IF char = '| THEN {class _ tokenNDET; GOTO GetNext} ELSE {class _ tokenBAR; GOTO GotNext}}; '/ => {NextChar[]; IF char = '/ THEN {class _ tokenPAR; GOTO GetNext} ELSE {class _ tokenDIV; GOTO GotNext}}; '. => {IF qDot THEN {qDot _ FALSE; class _ tokenDOTS; GO TO GetNext}; NextChar[]; SELECT char FROM '. => {class _ tokenDOTS; GO TO GetNext}; IN ['0..'9] => {v: LONG CARDINAL; exp: INTEGER; valid: BOOLEAN; [v, exp, valid] _ ScanFraction[0, TRUE]; SELECT char FROM 'E, 'e => {NextChar[]; [exp, valid] _ ScanExponent[exp, valid]}; ENDCASE; class _ tokenFLNUM; [token.value, valid] _ EnterFloating[v, exp, valid]; IF NOT valid THEN ScanError[number, token.index]; GO TO GotNext}; ENDCASE => {class _ tokenDOT; GO TO GotNext}}; '= => {NextChar[]; IF char = '> THEN {class _ tokenARROW; GO TO GetNext} ELSE {class _ tokenEQUAL; GO TO GotNext}}; '< => {NextChar[]; IF char = '= THEN {class _ tokenLE; GO TO GetNext} ELSE {class _ tokenLESS; GO TO GotNext}}; '> => {NextChar[]; IF char = '= THEN {class _ tokenGE; GO TO GetNext} ELSE {class _ tokenGREATER; GO TO GotNext}}; ENDCASE => {class _ scanTab[char]; IF class # 0 THEN GO TO GetNext; NextChar[]; ScanError[char, token.index]} REPEAT CheckEnd => IF char = '. AND toklen = 3 AND Rope.Run[rf, tokpos, "END", 0] = 3 THEN AccumulateEnding[]; GetNext => {NextChar[]}; GotNext => NULL; EndFile => {char _ NUL; class _ EndMarker; token.value _ P1.NullValue} ENDLOOP; nTokens _ nTokens + 1; lastToken _ token.index; IF debugging AND token.value.r # NIL THEN { lit: Rope.Ref _ NIL; ref: REF ANY _ token.value.r; TTY.PutChar[tty, 15C]; TTY.PutChar[tty, ' ]; TTY.PutChar[tty, '{ ]; WITH ref SELECT FROM x: REF LTNode => {lit _ x.literal}; x: REF HTNode => {lit _ x.name}; ENDCASE; [] _ Rope.Map[base: lit, action: TTYaction]; TTY.PutChar[tty, '} ]; }; RETURN}; -- numerical conversion Digit: ARRAY CHARACTER ['0..'9] OF CARDINAL = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; AppendDigit10: PROC [v: LONG CARDINAL, digit: CHARACTER ['0..'9]] RETURNS [newV: LONG CARDINAL, valid: BOOLEAN] = { MaxV: LONG CARDINAL = 429496729; -- (2**32-1)/10 MaxD: CARDINAL = 5; -- (2**32-1) MOD 10 d: INTEGER [0..9] = Digit[digit]; valid _ v < MaxV OR v = MaxV AND d <= MaxD; newV _ 10 * v + d; RETURN}; AppendDigit8: PROC [v: LONG CARDINAL, digit: CHARACTER ['0..'9]] RETURNS [newV: LONG CARDINAL, valid: BOOLEAN] = { MaxV: LONG CARDINAL = 3777777777B; -- (2**32-1)/8 MaxD: CARDINAL = 7B; -- (2**32-1) MOD 8 d: INTEGER [0..9] = Digit[digit]; valid _ d < 8 AND (v < MaxV OR v = MaxV AND d <= MaxD); newV _ 8 * v + d; RETURN}; ScanFraction: PROC [v: LONG CARDINAL, valid: BOOLEAN] RETURNS [newV: LONG CARDINAL, exp: INTEGER, newValid: BOOLEAN] = { newV _ v; exp _ 0; newValid _ valid AND char IN ['0..'9]; WHILE char IN ['0..'9] DO IF newValid THEN [newV, newValid] _ AppendDigit10[newV, char]; exp _ exp - 1; AddCharPlus[] ENDLOOP; RETURN}; ScanExponent: PROC [exp: INTEGER, valid: BOOLEAN] RETURNS [newExp: INTEGER, newValid: BOOLEAN] = { op: {plus, minus} _ plus; scale: INTEGER; SELECT char FROM '+ => AddCharPlus[]; '- => {op _ minus; AddCharPlus[]}; ENDCASE; [scale, newValid] _ ScanScaleFactor[valid AND char IN ['0..'9]]; newExp _ IF op = plus THEN exp + scale ELSE exp - scale; RETURN}; ScanScaleFactor: PROC [valid: BOOLEAN] RETURNS [scale: INTEGER, newValid: BOOLEAN] = { Max: INTEGER = (32767 - 9) / 10; -- (2**15-10)/10 newValid _ valid; scale _ 0; WHILE char IN ['0..'9] DO newValid _ valid AND scale <= Max; IF newValid THEN scale _ 10 * scale + Digit[char]; AddCharPlus[] ENDLOOP; RETURN}; BufferToText: PROC RETURNS [r: Rope.Text] = INLINE { RETURN [rf.Flatten[tokpos, toklen]]}; WrapLit: PROC [r: REF ANY] RETURNS [LTIndex] = { RETURN [NEW[LTNode _ [index: tokpos, value: r, literal: BufferToText[]]]]}; EnterLit: PROC [v: LONG CARDINAL] RETURNS [P1.Value] = { vRep: ARRAY INTEGER [0..SIZE[LONG UNSPECIFIED]) OF WORD _ LOOPHOLE[v]; RETURN [[r: WrapLit[NEW[CARDINAL _ vRep[0]]]]]}; EnterLongLit: PROC [v: LONG CARDINAL] RETURNS [P1.Value] = { RETURN [[r: WrapLit[NEW[LONG CARDINAL _ v]]]]}; EnterFloating: PROC [v: LONG CARDINAL, exp: INTEGER, valid: BOOLEAN] RETURNS [value: P1.Value, newValid: BOOLEAN] = { IF v > 2147483647 -- 2**31 - 1 THEN {newValid _ FALSE; v _ 0} ELSE newValid _ valid; value.r _ WrapLit [NEW[REAL _ Real.PairToReal [v, exp ! Real.RealException => {newValid _ FALSE; RESUME [vp]}]]]; RETURN}; -- ending accumulation AccumulateEnding: PROC = { -- accumulate the ending comment, and restore the input context end: Index _ tPos; flushingBravo: BOOLEAN _ FALSE; IF char # '. THEN ERROR; IF PPCommentTable.GetEnding[] > 0 THEN RETURN; -- already processed PPCommentTable.SetEnding[end]; WHILE char = '. AND NOT AtEof[] DO -- flush the ending dots NextChar[] ENDLOOP; toklen _ 0; -- a fresh start on the comment WHILE NOT AtEof[] DO SELECT char FROM ControlZ => flushingBravo _ TRUE; ENDCASE; AddCharPlus[] ENDLOOP; PPCommentTable.AddComment[end + 1, BufferToText[], end, 0]; [] _ ResetScanIndex[end]}; -- string literals EnterText: PROC [removeQuotes: BOOLEAN _ FALSE] RETURNS [P1.Value] = { lti: LTIndex _ WrapLit[NIL]; lti.value _ LOOPHOLE[lti.literal]; RETURN [[r: lti]]}; -- initialization/finalization ScanInit: PUBLIC PROC [table: ParseTable.TableRef] = { hashTab _ @table[table.scanTable.hashTab]; scanTab _ @table[table.scanTable.scanTab]; vocab _ LOOPHOLE[@table[table.scanTable.vocabBody]]; vocabIndex _ @table[table.scanTable.vocabIndex]; rf _ SakuraOps.GetSource[]; tPos _ -1; tLimit _ rf.Size[]; PPCommentTable.Reset[]; lastToken _ 0; NextChar[]; qDot _ FALSE; nTokens _ nErrors _ 0}; ScanReset: PUBLIC PROC RETURNS [CARDINAL, CARDINAL] = { rf _ NIL; RETURN [nTokens, nErrors]}; -- error handling ResetScanIndex: PUBLIC PROC [index: Index] RETURNS [success: BOOLEAN] = { tPos _ index - 1; NextChar[]; RETURN [TRUE]}; ScanError: PROC [code: {number, string, char, atom}, tokenIndex: Index] = { errorStream: IOStream.Handle _ SakuraOps.GetLog[]; nErrors _ nErrors + 1; ErrorContext[errorStream, SELECT code FROM number => "invalid number", string => "string unterminated or too long", char => "invalid character", atom => "invalid atom", ENDCASE => NIL, tokenIndex]; errorStream.PutChar[CR]; }; ErrorContext: PUBLIC PROC [to: IOStream.Handle, message: Rope.Ref, tokenIndex: Index] = { index: Index _ tokenIndex; limit: Index _ tokenIndex - 50; start: Index; IF limit < 0 THEN limit _ 0; to.PutChar[CR]; DO c: CHARACTER _ rf.Fetch[index]; IF c = CR THEN {index _ index+1; EXIT}; IF index = limit THEN EXIT; index _ index - 1; ENDLOOP; start _ index; FOR index IN [start..MIN[start+100,rf.Size[]]) DO c: CHARACTER _ rf.Fetch[index]; IF c=CR THEN EXIT; to.PutChar[c]; ENDLOOP; to.PutChar[CR]; FOR index IN [start..tokenIndex) DO c: CHARACTER _ rf.Fetch[index]; to.PutChar[IF c = TAB THEN TAB ELSE ' ]; ENDLOOP; PutRope[to, "^ "]; PutRope[to, message]; PutRope[to, " ["]; PutDecimal[to, tokenIndex]; to.PutChar[']]; to.PutChar[CR]}; END.