-- file SMScannerImpl.mesa -- derived from Compiler>Scanner.mesa -- last modified by Satterthwaite, July 8, 1983 12:29 pm -- last edit by Schmidt, May 3, 1982 4:27 pm DIRECTORY Ascii: TYPE USING [BS, CR, FF, LF, TAB], Atom: TYPE USING [MakeAtom], IO: TYPE USING [ STREAM, card, GetChar, GetIndex, EndOf, Put, PutChar, PutF, rope, SetIndex, string], SMP1: TYPE --P1-- USING [Token, TValue, nullTValue], SMParseTable: TYPE ParseTable USING [ HashIndex, HashTableRef, IndexTableRef, ScanTableRef, TableRef, TSymbol, VocabularyRef, tokenEOF, tokenFILENAME, tokenID, tokenSTR], Rope: TYPE USING [Flatten, FromProc, ROPE, Text], RopeInline: TYPE USING [NewText], SMCommentTable: TYPE USING [Index], SMCommentTableOps: TYPE USING [Add, AddBreakHint, Reset], SMOps: TYPE USING [MS], SMTree: TYPE Tree USING [Name]; SMScannerImpl: CEDAR PROGRAM IMPORTS Atoms: Atom, IO, Rope, RopeInline, SMCommentTableOps EXPORTS SMP1 SHARES Rope ~ { OPEN Tree~~SMTree, SMParseTable, P1~~SMP1; -- table installation tablePtr: TableRef; hashTab: HashTableRef; scanTab: ScanTableRef; vocab: VocabularyRef; vocabIndex: IndexTableRef; InstallScanTable: PUBLIC PROC[base: TableRef] ~ TRUSTED { tablePtr ← base; hashTab ← @tablePtr[tablePtr.scanTable.hashTab]; scanTab ← @tablePtr[tablePtr.scanTable.scanTab]; vocab ← LOOPHOLE[@tablePtr[tablePtr.scanTable.vocabBody]]; vocabIndex ← @tablePtr[tablePtr.scanTable.vocabIndex]}; CharClass: PROC[c: CHAR] RETURNS [TSymbol] ~ TRUSTED INLINE { RETURN [scanTab[c]]}; -- the global data here is protected by a monitor in SMReaderImpl cm: SMOps.MS; out: IO.STREAM; zone: ZONE ← NIL; Index: TYPE ~ SMCommentTable.Index; NUL: CHAR ~ '\000; stream: IO.STREAM ← NIL; char: CHAR; -- current (most recently scanned) character tPos: Index ← 0; -- index of char in stream AtEof: PROC RETURNS[BOOL] ~ { RETURN [char = NUL AND stream.EndOf]}; toklen: NAT ← 0; -- current token length tokpos: Index ← 0; -- source index for start of token TokenToText: PROC RETURNS[t: Rope.Text] ~ { -- copies from token from buffer savePos: Index ~ tPos; Get: PROC RETURNS[c: CHAR] ~ { RETURN [stream.GetChar]}; stream.SetIndex[tokpos]; t ← Rope.FromProc[toklen, Get].Flatten[]; stream.SetIndex[savePos+1]; RETURN}; nTokens: CARDINAL; -- token count nErrors: CARDINAL; -- lexical errors lastToken: Index ← 0; FirstChar: PROC[] ~ { tokpos ← tPos; toklen ← 1}; AddChar: PROC[] ~ { IF toklen = 0 THEN tokpos ← tPos; toklen ← toklen + 1}; AddCharPlus: PROC[] ~ { IF toklen = 0 THEN tokpos ← tPos; toklen ← toklen + 1; NextChar[]}; NextChar: PROC ~ { tPos ← tPos + 1; char ← stream.GetChar[ ! ANY => {char ← NUL; CONTINUE}]}; IdFromRope: PROC[r: Rope.ROPE] RETURNS[Tree.Name] ~ INLINE { RETURN [Atoms.MakeAtom[r]]}; IdFromBuffer: PROC RETURNS[Tree.Name] ~ { RETURN [IdFromRope[TokenToText[]]]}; Map: PROC[scan: PROC[CHAR] RETURNS[BOOL]] RETURNS[stopped: BOOL ← FALSE] ~ { UNTIL stopped OR stream.EndOf DO c: CHAR ~ stream.GetChar; stopped ← scan[c]; ENDLOOP; RETURN}; Atom: PUBLIC PROC RETURNS[token: P1.Token] ~ { DO CRcount: NAT ← 0; IF char IN [NUL..' ] THEN { NULcount: NAT ← 0; Scan: PROC[c: CHAR] RETURNS[BOOL] ~ { IF c > ' OR (c = NUL AND NULcount # 0) THEN {char ← c; RETURN [TRUE]}; tPos ← tPos + 1; NULcount ← 0; SELECT c FROM Ascii.CR => { CRcount ← CRcount + 1; IF cm.comments # NIL THEN (cm.comments).AddBreakHint[tPos]}; Ascii.FF => { IF cm.comments # NIL THEN (cm.comments).Add[tPos, "\f", lastToken, CRcount]; CRcount ← 0}; NUL => NULcount ← 1; ENDCASE; RETURN [FALSE]}; [] ← Scan[char]; IF ~Map[Scan] OR char = NUL THEN GO TO EndFile}; toklen ← 0; token.index ← tPos; token.value ← P1.nullTValue; IF CRcount > 1 AND cm.comments # NIL THEN { -- remember extra blank lines (cm.comments).Add[tPos-1, NIL, lastToken, CRcount-1]; CRcount ← 1}; SELECT char FROM 'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm, 'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z, '& => { Scan: PROC[c: CHAR] RETURNS[BOOL] ~ { SELECT c FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9], '& => NULL; ENDCASE => {char ← c; RETURN [TRUE]}; toklen ← toklen + 1; RETURN [FALSE]}; FirstChar[]; char ← '\000; [] ← Map[Scan]; tPos ← tPos + toklen; token.class ← tokenID; token.value ← IdFromBuffer[]; GO TO GotNext}; 'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M, 'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z => { first, last: CARDINAL ← char.ORD; uId: BOOL ← TRUE; Scan: PROC[c: CHAR] RETURNS[BOOL] ~ { SELECT c FROM IN ['A..'Z] => last ← c.ORD; IN ['a..'z], IN ['0..'9] => uId ← FALSE; ENDCASE => {char ← c; RETURN [TRUE]}; toklen ← toklen + 1; RETURN [FALSE]}; FirstChar[]; char ← '\000; [] ← Map[Scan]; tPos ← tPos + toklen; IF uId THEN TRUSTED { j: CARDINAL; h: HashIndex ← (first * 128 - first + last) MOD HashIndex.LAST + 1; WHILE (j ← hashTab[h].symbol) # 0 DO s2: CARDINAL ← vocabIndex[j-1]; IF vocabIndex[j] - s2 = toklen THEN { t: Rope.Text ~ TokenToText[]; FOR s1: CARDINAL IN [0..toklen) DO IF t[s1] # vocab.text[s2] THEN EXIT; s2 ← s2 + 1 REPEAT FINISHED => {token.class ← j; GO TO GotNext}; ENDLOOP; }; IF (h ← hashTab[h].link) = 0 THEN EXIT ENDLOOP}; token.class ← tokenID; token.value ← IdFromBuffer[]; GO TO GotNext}; ',, ';, ':, '., '~, '+, '↑, '*, '/, '\\, '(, '), '[, '], '=, '> => { token.class ← CharClass[char]; GO TO GetNext}; '" => { DO NextChar[]; SELECT char FROM '" => { NextChar[]; IF char # '" THEN GO TO QuoteEnd; AddChar[]}; '\\ => AddCharPlus[]; NUL => IF AtEof[] THEN GO TO QuoteEnd; ENDCASE; AddChar[]; IF toklen = NAT.LAST THEN { ScanError[string, token.index]; toklen ← 0}; REPEAT QuoteEnd => NULL ENDLOOP; token.value ← EnterText[]; token.class ← tokenSTR; GO TO GotNext}; '@ => { rbseen: BOOL ← FALSE; -- avoid parsing too far if ] Scan: PROC[c: CHAR] RETURNS[BOOL] ~ { IF toklen = 1 AND c ~= '[ THEN rbseen ← TRUE; SELECT c FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9], '., '!, '↑ => NULL; '<, '> => rbseen ← TRUE; '[, '] => IF ~rbseen THEN {IF c = '] THEN rbseen ← TRUE} ELSE {char ← c; RETURN[TRUE]}; ENDCASE => {char ← c; RETURN [TRUE]}; toklen ← toklen + 1; RETURN [FALSE]}; FirstChar[]; char ← '\000; [] ← Map[Scan]; tPos ← tPos + toklen; token.class ← tokenFILENAME; token.value ← TokenToText[]; GO TO GotNext}; '- => {-- comment processing pChar: CHAR ← NUL; Scan: PROC[c: CHAR] RETURNS[BOOL] ~ { toklen ← toklen + 1; IF c = Ascii.CR OR (c = '- AND pChar = c) THEN { char ← c; RETURN [TRUE]}; pChar ← c; RETURN[FALSE]}; token.class ← CharClass['-]; tokpos ← tPos; NextChar[]; IF char # '- THEN GO TO GotNext; toklen ← 2; char ← '\000; [] ← Map[Scan]; tPos ← tokpos + toklen - 1; IF cm.comments # NIL THEN { comment: Rope.Text ~ TokenToText[]; (cm.comments).Add[tokpos, comment, lastToken, CRcount]}; lastToken ← tokpos; IF char = '- THEN NextChar[]}; ENDCASE => { token.class ← CharClass[char]; IF token.class # 0 THEN GO TO GetNext; NextChar[]; ScanError[char, token.index]} REPEAT GetNext => NextChar[]; GotNext => NULL; EndFile => { char ← NUL; token ← [class~tokenEOF, index~tPos, value~P1.nullTValue]} ENDLOOP; nTokens ← nTokens + 1; lastToken ← token.index; RETURN}; -- string literals EnterText: PROC[] RETURNS[P1.TValue] ~ { RETURN [ParseLit[TokenToText[]]]}; ParseLit: PROC[t: Rope.Text] RETURNS[Rope.Text] ~ { IF t = NIL THEN RETURN [NIL]; { src: INTEGER ← 0; dst: INTEGER ← 0; nt: Rope.Text ← NIL; len: INTEGER ← t.length; WHILE src < len DO c: CHAR ← t[src]; src ← src + 1; IF c = '\\ OR c = '" THEN { -- oh well, we need to compress this guy cc: CHAR ← c; nt ← RopeInline.NewText[len]; -- over stuffed dst ← src-1; FOR i: INTEGER IN [0..dst) DO nt[i] ← t[i] ENDLOOP; IF src < len THEN {cc ← t[src]; src ← src + 1}; SELECT cc FROM 'n, 'N, 'r, 'R => c ← Ascii.CR; 't, 'T => c ← Ascii.TAB; 'b, 'B => c ← Ascii.BS; 'f, 'F => c ← Ascii.FF; 'l, 'L => c ← Ascii.LF; IN ['0..'7] => { code: NAT ← cc.ORD - '0.ORD; cc ← '\000; IF src < len THEN { cc ← t[src]; IF cc IN ['0..'7] THEN { src ← src + 1; code ← code * 8 + (cc.ORD - '0.ORD); cc ← '\000; IF src < len THEN { cc ← t[src]; IF cc IN ['0..'7] THEN { src ← src + 1; code ← code*8 + (cc.ORD - '0.ORD)}}}}; c ← VAL[code]}; ENDCASE => c ← cc}; IF nt # NIL THEN {nt[dst] ← c; dst ← dst + 1}; ENDLOOP; IF nt = NIL THEN RETURN [t] ELSE {nt.length ← dst; RETURN [nt]}; }; }; -- initialization/finalization ScanInit: PUBLIC PROC[model: SMOps.MS, source: IO.STREAM] ~ { cm ← model; out ← model.out; zone ← model.z; stream ← source; tPos ← stream.GetIndex-1; IF cm.comments # NIL THEN (cm.comments).Reset; lastToken ← 0; NextChar[]; nTokens ← nErrors ← 0}; ScanReset: PUBLIC PROC RETURNS[CARDINAL, CARDINAL] ~ { cm ← NIL; out ← NIL; zone ← NIL; RETURN [nTokens, nErrors]}; -- error handling ResetScanIndex: PUBLIC PROC[index: Index] RETURNS[success: BOOL←TRUE] ~ { stream.SetIndex[index]; tPos ← index-1; NextChar[]}; ScanError: PROC[code: {number, string, char, atom}, tokenIndex: Index] ~ { nErrors ← nErrors + 1; ErrorContext[SELECT code FROM $number => "invalid number", $string => "string unterminated or too long", $char => "invalid character", $atom => "invalid atom", ENDCASE => NIL, tokenIndex]; out.PutChar['\n]}; ErrorContext: PUBLIC PROC[message: Rope.ROPE, tokenIndex: Index] ~ { savePos: Index ~ tPos; low: Index ~ (IF tokenIndex >= 40 THEN tokenIndex-40 ELSE 0); high: Index ~ tokenIndex+40; out.PutChar['\n]; IF low > 0 THEN out.Put[IO.string["..."L]]; stream.SetIndex[low]; FOR i: Index IN [low..high] WHILE ~stream.EndOf DO c: CHAR ~ stream.GetChar; IF i = tokenIndex THEN out.Put[IO.string[" *↑* "L]]; out.PutChar[c]; ENDLOOP; IF ~stream.EndOf THEN out.Put[IO.string["..."L]]; out.PutF["\n%s [%d]\n", IO.rope[message], IO.card[tokenIndex]]; stream.SetIndex[savePos]; tPos ← savePos-1; NextChar[]}; -- error recovery (only) TokenValue: PUBLIC PROC[s: TSymbol] RETURNS [P1.TValue] ~ { RETURN [SELECT s FROM tokenID => IdFromRope["&anon"], ENDCASE => P1.nullTValue]}; }.