-- file SMScannerImpl.mesa
-- derived from Compiler>Scanner.mesa
-- last modified by Satterthwaite, July 8, 1983 12:29 pm
-- last edit by Schmidt, May 3, 1982 4:27 pm
DIRECTORY
Ascii: TYPE USING [BS, CR, FF, LF, TAB],
Atom: TYPE USING [MakeAtom],
IO: TYPE USING [
STREAM, card, GetChar, GetIndex, EndOf, Put, PutChar, PutF, rope, SetIndex, string],
SMP1: TYPE --P1-- USING [Token, TValue, nullTValue],
SMParseTable: TYPE ParseTable USING [
HashIndex, HashTableRef, IndexTableRef, ScanTableRef, TableRef, TSymbol, VocabularyRef,
tokenEOF, tokenFILENAME, tokenID, tokenSTR],
Rope: TYPE USING [Flatten, FromProc, ROPE, Text],
RopeInline: TYPE USING [NewText],
SMCommentTable: TYPE USING [Index],
SMCommentTableOps: TYPE USING [Add, AddBreakHint, Reset],
SMOps: TYPE USING [MS],
SMTree: TYPE Tree USING [Name];
SMScannerImpl: CEDAR PROGRAM
IMPORTS Atoms: Atom, IO, Rope, RopeInline, SMCommentTableOps
EXPORTS SMP1
SHARES Rope ~ {
OPEN Tree~~SMTree, SMParseTable, P1~~SMP1;
-- table installation
tablePtr: TableRef;
hashTab: HashTableRef;
scanTab: ScanTableRef;
vocab: VocabularyRef;
vocabIndex: IndexTableRef;
InstallScanTable: PUBLIC PROC[base: TableRef] ~ TRUSTED {
tablePtr ← base;
hashTab ← @tablePtr[tablePtr.scanTable.hashTab];
scanTab ← @tablePtr[tablePtr.scanTable.scanTab];
vocab ← LOOPHOLE[@tablePtr[tablePtr.scanTable.vocabBody]];
vocabIndex ← @tablePtr[tablePtr.scanTable.vocabIndex]};
CharClass: PROC[c: CHAR] RETURNS [TSymbol] ~ TRUSTED INLINE {
RETURN [scanTab[c]]};
-- the global data here is protected by a monitor in SMReaderImpl
cm: SMOps.MS;
out: IO.STREAM;
zone: ZONE ← NIL;
Index: TYPE ~ SMCommentTable.Index;
NUL: CHAR ~ '\000;
stream: IO.STREAM ← NIL;
char: CHAR; -- current (most recently scanned) character
tPos: Index ← 0; -- index of char in stream
AtEof: PROC RETURNS[BOOL] ~ {
RETURN [char = NUL AND stream.EndOf]};
toklen: NAT ← 0; -- current token length
tokpos: Index ← 0; -- source index for start of token
TokenToText: PROC RETURNS[t: Rope.Text] ~ { -- copies from token from buffer
savePos: Index ~ tPos;
Get: PROC RETURNS[c: CHAR] ~ {
RETURN [stream.GetChar]};
stream.SetIndex[tokpos];
t ← Rope.FromProc[toklen, Get].Flatten[];
stream.SetIndex[savePos+1];
RETURN};
nTokens: CARDINAL; -- token count
nErrors: CARDINAL; -- lexical errors
lastToken: Index ← 0;
FirstChar: PROC[] ~ {
tokpos ← tPos; toklen ← 1};
AddChar: PROC[] ~ {
IF toklen = 0 THEN tokpos ← tPos;
toklen ← toklen + 1};
AddCharPlus: PROC[] ~ {
IF toklen = 0 THEN tokpos ← tPos;
toklen ← toklen + 1;
NextChar[]};
NextChar: PROC ~ {
tPos ← tPos + 1;
char ← stream.GetChar[ ! ANY => {char ← NUL; CONTINUE}]};
IdFromRope: PROC[r: Rope.ROPE] RETURNS[Tree.Name] ~ INLINE {
RETURN [Atoms.MakeAtom[r]]};
IdFromBuffer: PROC RETURNS[Tree.Name] ~ {
RETURN [IdFromRope[TokenToText[]]]};
Map: PROC[scan: PROC[CHAR] RETURNS[BOOL]] RETURNS[stopped: BOOL ← FALSE] ~ {
UNTIL stopped OR stream.EndOf DO
c: CHAR ~ stream.GetChar;
stopped ← scan[c];
ENDLOOP;
RETURN};
Atom: PUBLIC PROC RETURNS[token: P1.Token] ~ {
DO
CRcount: NAT ← 0;
IF char IN [NUL..' ] THEN {
NULcount: NAT ← 0;
Scan: PROC[c: CHAR] RETURNS[BOOL] ~ {
IF c > ' OR (c = NUL AND NULcount # 0) THEN {char ← c; RETURN [TRUE]};
tPos ← tPos + 1; NULcount ← 0;
SELECT c FROM
Ascii.CR => {
CRcount ← CRcount + 1;
IF cm.comments # NIL THEN (cm.comments).AddBreakHint[tPos]};
Ascii.FF => {
IF cm.comments # NIL THEN (cm.comments).Add[tPos, "\f", lastToken, CRcount];
CRcount ← 0};
NUL => NULcount ← 1;
ENDCASE;
RETURN [FALSE]};
[] ← Scan[char];
IF ~Map[Scan] OR char = NUL THEN GO TO EndFile};
toklen ← 0;
token.index ← tPos;
token.value ← P1.nullTValue;
IF CRcount > 1 AND cm.comments # NIL THEN { -- remember extra blank lines
(cm.comments).Add[tPos-1, NIL, lastToken, CRcount-1];
CRcount ← 1};
SELECT char FROM
'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm,
'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z, '& => {
Scan: PROC[c: CHAR] RETURNS[BOOL] ~ {
SELECT c FROM
IN ['a..'z], IN ['A..'Z], IN ['0..'9], '& => NULL;
ENDCASE => {char ← c; RETURN [TRUE]};
toklen ← toklen + 1;
RETURN [FALSE]};
FirstChar[];
char ← '\000;
[] ← Map[Scan];
tPos ← tPos + toklen;
token.class ← tokenID;
token.value ← IdFromBuffer[];
GO TO GotNext};
'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M,
'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z => {
first, last: CARDINAL ← char.ORD;
uId: BOOL ← TRUE;
Scan: PROC[c: CHAR] RETURNS[BOOL] ~ {
SELECT c FROM
IN ['A..'Z] => last ← c.ORD;
IN ['a..'z], IN ['0..'9] => uId ← FALSE;
ENDCASE => {char ← c; RETURN [TRUE]};
toklen ← toklen + 1;
RETURN [FALSE]};
FirstChar[];
char ← '\000;
[] ← Map[Scan];
tPos ← tPos + toklen;
IF uId THEN TRUSTED {
j: CARDINAL;
h: HashIndex ← (first * 128 - first + last) MOD HashIndex.LAST + 1;
WHILE (j ← hashTab[h].symbol) # 0 DO
s2: CARDINAL ← vocabIndex[j-1];
IF vocabIndex[j] - s2 = toklen THEN {
t: Rope.Text ~ TokenToText[];
FOR s1: CARDINAL IN [0..toklen) DO
IF t[s1] # vocab.text[s2] THEN EXIT;
s2 ← s2 + 1
REPEAT
FINISHED => {token.class ← j; GO TO GotNext};
ENDLOOP;
};
IF (h ← hashTab[h].link) = 0 THEN EXIT
ENDLOOP};
token.class ← tokenID;
token.value ← IdFromBuffer[];
GO TO GotNext};
',, ';, ':, '., '~, '+, '↑, '*, '/, '\\,
'(, '), '[, '], '=, '> => {
token.class ← CharClass[char]; GO TO GetNext};
'" => {
DO
NextChar[];
SELECT char FROM
'" => {
NextChar[];
IF char # '" THEN GO TO QuoteEnd;
AddChar[]};
'\\ => AddCharPlus[];
NUL => IF AtEof[] THEN GO TO QuoteEnd;
ENDCASE;
AddChar[];
IF toklen = NAT.LAST THEN {
ScanError[string, token.index]; toklen ← 0};
REPEAT
QuoteEnd => NULL
ENDLOOP;
token.value ← EnterText[];
token.class ← tokenSTR;
GO TO GotNext};
'@ => {
rbseen: BOOL ← FALSE; -- avoid parsing too far if ]
Scan: PROC[c: CHAR] RETURNS[BOOL] ~ {
IF toklen = 1 AND c ~= '[ THEN rbseen ← TRUE;
SELECT c FROM
IN ['a..'z], IN ['A..'Z], IN ['0..'9], '., '!, '↑ => NULL;
'<, '> => rbseen ← TRUE;
'[, '] =>
IF ~rbseen THEN {IF c = '] THEN rbseen ← TRUE}
ELSE {char ← c; RETURN[TRUE]};
ENDCASE => {char ← c; RETURN [TRUE]};
toklen ← toklen + 1;
RETURN [FALSE]};
FirstChar[];
char ← '\000;
[] ← Map[Scan];
tPos ← tPos + toklen;
token.class ← tokenFILENAME;
token.value ← TokenToText[];
GO TO GotNext};
'- => {-- comment processing
pChar: CHAR ← NUL;
Scan: PROC[c: CHAR] RETURNS[BOOL] ~ {
toklen ← toklen + 1;
IF c = Ascii.CR OR (c = '- AND pChar = c) THEN {
char ← c; RETURN [TRUE]};
pChar ← c;
RETURN[FALSE]};
token.class ← CharClass['-];
tokpos ← tPos;
NextChar[];
IF char # '- THEN GO TO GotNext;
toklen ← 2;
char ← '\000;
[] ← Map[Scan];
tPos ← tokpos + toklen - 1;
IF cm.comments # NIL THEN {
comment: Rope.Text ~ TokenToText[];
(cm.comments).Add[tokpos, comment, lastToken, CRcount]};
lastToken ← tokpos;
IF char = '- THEN NextChar[]};
ENDCASE => {
token.class ← CharClass[char];
IF token.class # 0 THEN GO TO GetNext;
NextChar[];
ScanError[char, token.index]}
REPEAT
GetNext => NextChar[];
GotNext => NULL;
EndFile => {
char ← NUL;
token ← [class~tokenEOF, index~tPos, value~P1.nullTValue]}
ENDLOOP;
nTokens ← nTokens + 1;
lastToken ← token.index;
RETURN};
-- string literals
EnterText: PROC[] RETURNS[P1.TValue] ~ {
RETURN [ParseLit[TokenToText[]]]};
ParseLit: PROC[t: Rope.Text] RETURNS[Rope.Text] ~ {
IF t = NIL THEN RETURN [NIL];
{
src: INTEGER ← 0;
dst: INTEGER ← 0;
nt: Rope.Text ← NIL;
len: INTEGER ← t.length;
WHILE src < len DO
c: CHAR ← t[src];
src ← src + 1;
IF c = '\\ OR c = '" THEN {
-- oh well, we need to compress this guy
cc: CHAR ← c;
nt ← RopeInline.NewText[len]; -- over stuffed
dst ← src-1;
FOR i: INTEGER IN [0..dst) DO nt[i] ← t[i] ENDLOOP;
IF src < len THEN {cc ← t[src]; src ← src + 1};
SELECT cc FROM
'n, 'N, 'r, 'R => c ← Ascii.CR;
't, 'T => c ← Ascii.TAB;
'b, 'B => c ← Ascii.BS;
'f, 'F => c ← Ascii.FF;
'l, 'L => c ← Ascii.LF;
IN ['0..'7] => {
code: NAT ← cc.ORD - '0.ORD;
cc ← '\000;
IF src < len THEN {
cc ← t[src];
IF cc IN ['0..'7] THEN {
src ← src + 1;
code ← code * 8 + (cc.ORD - '0.ORD);
cc ← '\000;
IF src < len THEN {
cc ← t[src];
IF cc IN ['0..'7] THEN {
src ← src + 1;
code ← code*8 + (cc.ORD - '0.ORD)}}}};
c ← VAL[code]};
ENDCASE => c ← cc};
IF nt # NIL THEN {nt[dst] ← c; dst ← dst + 1};
ENDLOOP;
IF nt = NIL THEN RETURN [t]
ELSE {nt.length ← dst; RETURN [nt]};
};
};
-- initialization/finalization
ScanInit: PUBLIC PROC[model: SMOps.MS, source: IO.STREAM] ~ {
cm ← model; out ← model.out; zone ← model.z;
stream ← source;
tPos ← stream.GetIndex-1;
IF cm.comments # NIL THEN (cm.comments).Reset;
lastToken ← 0;
NextChar[];
nTokens ← nErrors ← 0};
ScanReset: PUBLIC PROC RETURNS[CARDINAL, CARDINAL] ~ {
cm ← NIL; out ← NIL; zone ← NIL;
RETURN [nTokens, nErrors]};
-- error handling
ResetScanIndex: PUBLIC PROC[index: Index] RETURNS[success: BOOL←TRUE] ~ {
stream.SetIndex[index];
tPos ← index-1; NextChar[]};
ScanError: PROC[code: {number, string, char, atom}, tokenIndex: Index] ~ {
nErrors ← nErrors + 1;
ErrorContext[SELECT code FROM
$number => "invalid number",
$string => "string unterminated or too long",
$char => "invalid character",
$atom => "invalid atom",
ENDCASE => NIL,
tokenIndex];
out.PutChar['\n]};
ErrorContext: PUBLIC PROC[message: Rope.ROPE, tokenIndex: Index] ~ {
savePos: Index ~ tPos;
low: Index ~ (IF tokenIndex >= 40 THEN tokenIndex-40 ELSE 0);
high: Index ~ tokenIndex+40;
out.PutChar['\n];
IF low > 0 THEN out.Put[IO.string["..."L]];
stream.SetIndex[low];
FOR i: Index IN [low..high] WHILE ~stream.EndOf DO
c: CHAR ~ stream.GetChar;
IF i = tokenIndex THEN out.Put[IO.string[" *↑* "L]];
out.PutChar[c];
ENDLOOP;
IF ~stream.EndOf THEN out.Put[IO.string["..."L]];
out.PutF["\n%s [%d]\n", IO.rope[message], IO.card[tokenIndex]];
stream.SetIndex[savePos];
tPos ← savePos-1; NextChar[]};
-- error recovery (only)
TokenValue: PUBLIC PROC[s: TSymbol] RETURNS [P1.TValue] ~ {
RETURN [SELECT s FROM
tokenID => IdFromRope["&anon"],
ENDCASE => P1.nullTValue]};
}.