-- file SakuraScanner.Mesa
-- derived from Compiler>Scanner.Mesa
-- last modified by Satterthwaite, January 12, 1981 12:37 PM
-- last edit by Russ Atkinson, 9-Jul-81 14:23:40
-- last edited by Suzuki, 15-Nov-81 19:37:06
DIRECTORY
IOStream: TYPE
USING [CR, FF, TAB, Handle, PutChar],
Inline: TYPE
USING [LowHalf, HighHalf],
PPCommentTable: TYPE
USING [AddComment, AddBreakHint, GetEnding, Index, Reset, SetEnding],
PPLeaves: TYPE
USING [HTIndex, HTNode, LTIndex, LTNode],
SakuraOps: TYPE
USING [GetLog, GetSource],
PPP1: TYPE
USING [Token, Value, NullValue],
ParseTable: TYPE
USING [TableRef, HashIndex, TSymbol, VocabHashEntry, EndMarker, tokenARROW,
tokenATOM, tokenCHAR, tokenDOT, tokenDOTS, tokenEQUAL, tokenGE,
tokenGREATER, tokenID, tokenLE, tokenLESS, tokenFLNUM, tokenLNUM,
tokenLSTR, tokenMINUS, tokenNUM, tokenSTR, tokenRARROW, tokenBAR,
tokenNDET, tokenPAR, tokenDIV],
SakuraCommon: TYPE USING [tty],
SakuraUtil: TYPE
USING [PutDecimal, PutRope],
Real: TYPE
USING [RealException, PairToReal],
Rope: TYPE
USING [Fetch, Flatten, FromChar, Map, Ref, Run, Size, Text],
TTY: TYPE
USING [Handle, PutChar];
SakuraScanner: PROGRAM
IMPORTS IOStream, PPCommentTable, SakuraOps, SakuraUtil, Inline, Real, Rope, SakuraCommon, TTY
EXPORTS PPLeaves, PPP1
= BEGIN OPEN IOStream, PPLeaves,
ParseTable, P1: PPP1, SakuraUtil;
Index: TYPE = PPCommentTable.Index;
hashTab: LONG POINTER TO ARRAY HashIndex OF VocabHashEntry;
scanTab: LONG POINTER TO ARRAY CHARACTER [40C..177C] OF TSymbol;
vocab: LONG STRING;
vocabIndex: LONG POINTER TO ARRAY TSymbol OF CARDINAL;
NUL: CHARACTER = 0C;
ControlZ: CHARACTER = 32C; -- Bravo escape char
rf: Rope.Ref ← NIL; -- the source
tPos,tLimit: Index ← 0;
tty: TTY.Handle ← SakuraCommon.tty;
debugging: BOOLEAN ← FALSE;
TTYaction: PROC [c: CHARACTER] RETURNS [BOOLEAN] = {
TTY.PutChar[tty, c]; RETURN [FALSE]};
Shorten: PROC [x: Index] RETURNS [NAT] = INLINE {
IF Inline.HighHalf[x] # 0 THEN ERROR;
RETURN [Inline.LowHalf[x]]};
AtEof: PROC RETURNS [BOOLEAN] = INLINE {
RETURN [char = NUL AND tPos >= tLimit]};
toklen: NAT ← 0; -- current token length
tokpos: Index ← 0; -- source index for start of token
nTokens: CARDINAL; -- token count
nErrors: CARDINAL; -- lexical errors
lastToken: Index ← 0;
FirstChar: PROC [] = INLINE {
tokpos ← tPos; toklen ← 1};
AddChar: PROC [] = INLINE {
IF toklen = 0 THEN tokpos ← tPos;
toklen ← toklen + 1};
AddCharPlus: PROC [] = INLINE {
IF toklen = 0 THEN tokpos ← tPos;
toklen ← toklen + 1;
NextChar[]};
NextChar: PROC = INLINE {
char ← rf.Fetch[tPos ← tPos + 1
! ANY => {char ← NUL; tPos ← tLimit; CONTINUE}]};
char: CHARACTER; -- current (most recently scanned) character
qDot: BOOLEAN; -- used to resolved decimal point vs. interval
IdFromRope: PUBLIC PROC [r: Rope.Ref, index: Index] RETURNS [HTIndex] = {
RETURN [NEW[HTNode ← [index: index, name: r]]]};
IdFromBuffer: PROC [index: Index] RETURNS [HTIndex] = INLINE {
RETURN [IdFromRope[BufferToText[], index]];
};
IdFirst: HTIndex ← IdFromRope["first", LAST[Index]];
IDLock: HTIndex ← IdFromRope["LOCK", LAST[Index]];
IDRest: HTIndex ← IdFromRope["rest", LAST[Index]];
IdOfFirst: PUBLIC PROC RETURNS [HTIndex] = {RETURN [IdFirst]};
IdOfLock: PUBLIC PROC RETURNS [HTIndex] = {RETURN [IDLock]};
IdOfRest: PUBLIC PROC RETURNS [HTIndex] = {RETURN [IDRest]};
FFrope: Rope.Ref = Rope.FromChar[FF];
Atom: PUBLIC PROC RETURNS [token: P1.Token] = {
OPEN token;
DO CRcount: NAT ← 0;
IF char IN [NUL..' ] THEN {
inBravo: BOOLEAN ← FALSE;
scan: PROC [c: CHARACTER] RETURNS [BOOLEAN] = {
IF c > 40C AND NOT inBravo THEN {char ← c; RETURN [TRUE]};
tPos ← tPos + 1;
SELECT c FROM
CR =>
{IF inBravo THEN inBravo ← FALSE;
CRcount ← CRcount + 1;
PPCommentTable.AddBreakHint[tPos]};
FF =>
{PPCommentTable.AddComment[tPos, FFrope, lastToken, CRcount];
CRcount ← 0};
ControlZ =>
{inBravo ← TRUE};
ENDCASE;
RETURN [FALSE];
};
IF NOT rf.Map[tPos, tLimit, scan] THEN GO TO EndFile;
};
toklen ← 0;
token.index ← tPos;
token.value ← P1.NullValue;
IF CRcount > 1 THEN
-- remember extra blank lines
{PPCommentTable.AddComment[tPos-1, NIL, lastToken, CRcount - 1];
CRcount ← 1};
SELECT char FROM
'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm, 'n, 'o, 'p,
'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z =>
{scan: PROC [c: CHARACTER] RETURNS [BOOLEAN] = {
SELECT c FROM
IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {};
ENDCASE => {char ← c; RETURN [TRUE]};
toklen ← toklen + 1;
RETURN [FALSE]};
FirstChar[];
char ← 0C;
[] ← rf.Map[tPos+1, tLimit, scan];
tPos ← tPos + toklen;
class ← tokenID;
token.value.r ← IdFromBuffer[tokpos];
GO TO GotNext};
'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M, 'N, 'O, 'P,
'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z =>
{first, last: CARDINAL ← char - 0C;
uId: BOOLEAN ← TRUE;
scan: PROC [c: CHARACTER] RETURNS [BOOLEAN] = {
SELECT c FROM
IN ['A..'Z] => last ← c - 0C;
IN ['a..'z], IN ['0..'9] => uId ← FALSE;
ENDCASE => {char ← c; RETURN [TRUE]};
toklen ← toklen + 1;
RETURN [FALSE]};
FirstChar[];
char ← 0C;
[] ← rf.Map[tPos+1, tLimit, scan];
tPos ← tPos + toklen;
IF uId THEN
{j: CARDINAL ← 0;
len: NAT ← toklen;
h: HashIndex ← (first * 128 - first + last) MOD LAST[HashIndex] + 1;
WHILE (j ← hashTab[h].symbol) # 0 DO
s2: CARDINAL ← vocabIndex[j - 1];
IF vocabIndex[j] - s2 = len THEN
FOR s1: CARDINAL IN [0..len) DO
IF rf.Fetch[tokpos+s1] # vocab[s2] THEN EXIT;
s2 ← s2 + 1
REPEAT
FINISHED => {class ← j; GO TO CheckEnd};
ENDLOOP;
IF (h ← hashTab[h].link) = 0 THEN EXIT
ENDLOOP};
class ← tokenID;
token.value.r ← IdFromBuffer[tokpos];
GO TO GotNext};
'0, '1, '2, '3, '4, '5, '6, '7, '8, '9 =>
{v, v10, v8: LONG CARDINAL ← 0;
exp: INTEGER;
valid, valid10, valid8: BOOLEAN ← TRUE;
point: BOOLEAN ← FALSE;
MaxWord: CARDINAL = LAST[CARDINAL];
WHILE char IN ['0..'9] DO
IF valid10 THEN [v10, valid10] ← AppendDigit10[v10, char];
IF valid8 THEN [v8, valid8] ← AppendDigit8[v8, char];
AddCharPlus[]
ENDLOOP;
IF char = '. THEN
{NextChar[];
IF char = '.
THEN qDot ← TRUE
ELSE {point ← TRUE; AddChar[];
[v10, exp, valid10] ← ScanFraction[v10, valid10]}};
SELECT char FROM
'e, 'E =>
{AddCharPlus[];
v ← v10; [exp, valid] ← ScanExponent[exp, valid10];
class ← tokenFLNUM};
'b, 'B =>
{AddCharPlus[];
v ← v8;
[exp, valid] ← ScanScaleFactor[valid8 AND NOT point];
THROUGH [1..exp] WHILE valid DO
[v, valid] ← AppendDigit8[v, '0]
ENDLOOP;
class ← tokenLNUM};
'c, 'C =>
{AddCharPlus[];
v ← v8; valid ← valid8 AND NOT point AND v <= 255; class ← tokenCHAR};
'd, 'D =>
{AddCharPlus[];
v ← v10;
[exp, valid] ← ScanScaleFactor[valid10 AND NOT point];
THROUGH [1..exp] WHILE valid DO
[v, valid] ← AppendDigit10[v, '0]
ENDLOOP;
class ← tokenLNUM};
ENDCASE =>
{v ← v10; valid ← valid10;
class ← IF point THEN tokenFLNUM ELSE tokenLNUM};
SELECT class FROM
tokenCHAR => token.value ← EnterLit[v];
tokenFLNUM => [token.value, valid] ← EnterFloating[v, exp, valid];
ENDCASE => IF v > MaxWord
THEN token.value ← EnterLongLit[v]
ELSE {class ← tokenNUM; token.value ← EnterLit[v]};
IF NOT valid THEN ScanError[number, tokpos];
GO TO GotNext};
',, ';, ':, '←, '#, '~, '+, '*, '↑, '@, '!, '(, '), '[, '], '{ =>
{class ← scanTab[char]; GO TO GetNext};
'} => {class ← scanTab[char];
NextChar[];
IF char = '. THEN AccumulateEnding[];
GO TO GotNext};
'' =>
{AddCharPlus[];
AddChar[];
class ← tokenCHAR;
token.value ← EnterLit[char - 0C];
GO TO GetNext};
'" =>
{quoteSeen: BOOLEAN ← FALSE;
DO NextChar[];
SELECT char FROM
'" =>
{NextChar[];
IF char # '" THEN GO TO QuoteEnd;
quoteSeen ← TRUE};
ENDCASE;
AddChar[];
IF toklen = LAST[NAT] THEN
{ScanError[string, token.index]; toklen ← 0};
REPEAT
QuoteEnd => NULL
ENDLOOP;
token.value ← EnterText[quoteSeen];
IF char = 'L
THEN {class ← tokenLSTR; GO TO GetNext}
ELSE {class ← tokenSTR; GO TO GotNext}};
'$ =>
{NextChar[];
SELECT char FROM
IN ['a..'z], IN ['A..'Z] => NULL;
ENDCASE => ScanError[atom, token.index];
DO -- accumulate rest of atom name
SELECT char FROM
IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {};
ENDCASE => EXIT;
AddCharPlus[]
ENDLOOP;
class ← tokenATOM;
token.value ← EnterText[];
GO TO GotNext};
'- => {-- comment processing
pChar: CHARACTER ← NUL;
scan: PROC [c: CHARACTER] RETURNS [BOOLEAN] = {
toklen ← toklen + 1;
IF c = CR THEN
{char ← c; RETURN [TRUE]};
IF (c = '- AND pChar = c) THEN
{char ← c; RETURN [TRUE]};
pChar ← c;
RETURN[FALSE];
};
tokpos ← tPos;
NextChar[];
IF char = '> THEN {class ← tokenRARROW; GOTO GetNext};
IF char # '- THEN {class ← tokenMINUS; GO TO GotNext};
toklen ← 2;
char ← 0C;
[] ← rf.Map[tPos+1, tLimit, scan];
tPos ← tokpos + toklen - 1;
{comment: Rope.Text ← BufferToText[];
PPCommentTable.AddComment[tokpos, comment, lastToken, CRcount];
lastToken ← tokpos;
IF comment.Fetch[0] # '- OR comment.Fetch[1] # '- THEN ERROR;
SELECT char FROM
'- => {len: NAT = Inline.LowHalf[Rope.Size[comment]];
IF comment.Fetch[len-1] # '- OR comment.Fetch[len-2] # '-
THEN ERROR};
CR => {len: NAT = Inline.LowHalf[Rope.Size[comment]];
IF comment.Fetch[len-1] # CR
THEN ERROR};
0C => {-- end of file
};
ENDCASE => ERROR};
IF char # CR THEN NextChar[]};
'| => {NextChar[];
IF char = '| THEN {class ← tokenNDET; GOTO GetNext}
ELSE {class ← tokenBAR; GOTO GotNext}};
'/ => {NextChar[];
IF char = '/ THEN {class ← tokenPAR; GOTO GetNext}
ELSE {class ← tokenDIV; GOTO GotNext}};
'. =>
{IF qDot THEN
{qDot ← FALSE; class ← tokenDOTS; GO TO GetNext};
NextChar[];
SELECT char FROM
'. => {class ← tokenDOTS; GO TO GetNext};
IN ['0..'9] =>
{v: LONG CARDINAL;
exp: INTEGER;
valid: BOOLEAN;
[v, exp, valid] ← ScanFraction[0, TRUE];
SELECT char FROM
'E, 'e => {NextChar[]; [exp, valid] ← ScanExponent[exp, valid]};
ENDCASE;
class ← tokenFLNUM;
[token.value, valid] ← EnterFloating[v, exp, valid];
IF NOT valid THEN ScanError[number, token.index];
GO TO GotNext};
ENDCASE => {class ← tokenDOT; GO TO GotNext}};
'= =>
{NextChar[];
IF char = '>
THEN {class ← tokenARROW; GO TO GetNext}
ELSE {class ← tokenEQUAL; GO TO GotNext}};
'< =>
{NextChar[];
IF char = '=
THEN {class ← tokenLE; GO TO GetNext}
ELSE {class ← tokenLESS; GO TO GotNext}};
'> =>
{NextChar[];
IF char = '=
THEN {class ← tokenGE; GO TO GetNext}
ELSE {class ← tokenGREATER; GO TO GotNext}};
ENDCASE => {class ← scanTab[char];
IF class # 0 THEN GO TO GetNext;
NextChar[];
ScanError[char, token.index]}
REPEAT
CheckEnd =>
IF char = '. AND toklen = 3 AND Rope.Run[rf, tokpos, "END", 0] = 3 THEN
AccumulateEnding[];
GetNext => {NextChar[]};
GotNext => NULL;
EndFile =>
{char ← NUL;
class ← EndMarker;
token.value ← P1.NullValue}
ENDLOOP;
nTokens ← nTokens + 1;
lastToken ← token.index;
IF debugging AND token.value.r # NIL THEN {
lit: Rope.Ref ← NIL;
ref: REF ANY ← token.value.r;
TTY.PutChar[tty, 15C];
TTY.PutChar[tty, ' ];
TTY.PutChar[tty, '{ ];
WITH ref SELECT FROM
x: REF LTNode => {lit ← x.literal};
x: REF HTNode => {lit ← x.name};
ENDCASE;
[] ← Rope.Map[base: lit, action: TTYaction];
TTY.PutChar[tty, '} ];
};
RETURN};
-- numerical conversion
Digit: ARRAY CHARACTER ['0..'9] OF CARDINAL = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
AppendDigit10: PROC [v: LONG CARDINAL, digit: CHARACTER ['0..'9]]
RETURNS [newV: LONG CARDINAL, valid: BOOLEAN] = {
MaxV: LONG CARDINAL = 429496729; -- (2**32-1)/10
MaxD: CARDINAL = 5; -- (2**32-1) MOD 10
d: INTEGER [0..9] = Digit[digit];
valid ← v < MaxV OR v = MaxV AND d <= MaxD;
newV ← 10 * v + d;
RETURN};
AppendDigit8: PROC [v: LONG CARDINAL, digit: CHARACTER ['0..'9]]
RETURNS [newV: LONG CARDINAL, valid: BOOLEAN] = {
MaxV: LONG CARDINAL = 3777777777B; -- (2**32-1)/8
MaxD: CARDINAL = 7B; -- (2**32-1) MOD 8
d: INTEGER [0..9] = Digit[digit];
valid ← d < 8 AND (v < MaxV OR v = MaxV AND d <= MaxD);
newV ← 8 * v + d;
RETURN};
ScanFraction: PROC [v: LONG CARDINAL, valid: BOOLEAN]
RETURNS [newV: LONG CARDINAL, exp: INTEGER, newValid: BOOLEAN] = {
newV ← v;
exp ← 0;
newValid ← valid AND char IN ['0..'9];
WHILE char IN ['0..'9] DO
IF newValid THEN [newV, newValid] ← AppendDigit10[newV, char]; exp ← exp - 1;
AddCharPlus[]
ENDLOOP;
RETURN};
ScanExponent: PROC [exp: INTEGER, valid: BOOLEAN]
RETURNS [newExp: INTEGER, newValid: BOOLEAN] = {
op: {plus, minus} ← plus;
scale: INTEGER;
SELECT char FROM
'+ => AddCharPlus[];
'- => {op ← minus; AddCharPlus[]};
ENDCASE;
[scale, newValid] ← ScanScaleFactor[valid AND char IN ['0..'9]];
newExp ← IF op = plus THEN exp + scale ELSE exp - scale;
RETURN};
ScanScaleFactor: PROC [valid: BOOLEAN]
RETURNS [scale: INTEGER, newValid: BOOLEAN] = {
Max: INTEGER = (32767 - 9) / 10; -- (2**15-10)/10
newValid ← valid; scale ← 0;
WHILE char IN ['0..'9] DO
newValid ← valid AND scale <= Max;
IF newValid THEN scale ← 10 * scale + Digit[char];
AddCharPlus[]
ENDLOOP;
RETURN};
BufferToText: PROC RETURNS [r: Rope.Text] = INLINE {
RETURN [rf.Flatten[tokpos, toklen]]};
WrapLit: PROC [r: REF ANY] RETURNS [LTIndex] = {
RETURN [NEW[LTNode ← [index: tokpos, value: r, literal: BufferToText[]]]]};
EnterLit: PROC [v: LONG CARDINAL] RETURNS [P1.Value] = {
vRep: ARRAY INTEGER [0..SIZE[LONG UNSPECIFIED]) OF WORD ← LOOPHOLE[v];
RETURN [[r: WrapLit[NEW[CARDINAL ← vRep[0]]]]]};
EnterLongLit: PROC [v: LONG CARDINAL] RETURNS [P1.Value] = {
RETURN [[r: WrapLit[NEW[LONG CARDINAL ← v]]]]};
EnterFloating: PROC [v: LONG CARDINAL, exp: INTEGER, valid: BOOLEAN]
RETURNS [value: P1.Value, newValid: BOOLEAN] = {
IF v > 2147483647 -- 2**31 - 1
THEN {newValid ← FALSE; v ← 0}
ELSE newValid ← valid;
value.r ←
WrapLit
[NEW[REAL ← Real.PairToReal
[v, exp
! Real.RealException => {newValid ← FALSE; RESUME [vp]}]]];
RETURN};
-- ending accumulation
AccumulateEnding: PROC = {
-- accumulate the ending comment, and restore the input context
end: Index ← tPos;
flushingBravo: BOOLEAN ← FALSE;
IF char # '. THEN ERROR;
IF PPCommentTable.GetEnding[] > 0 THEN RETURN; -- already processed
PPCommentTable.SetEnding[end];
WHILE char = '. AND NOT AtEof[] DO -- flush the ending dots
NextChar[]
ENDLOOP;
toklen ← 0; -- a fresh start on the comment
WHILE NOT AtEof[] DO
SELECT char FROM
ControlZ => flushingBravo ← TRUE;
ENDCASE;
AddCharPlus[]
ENDLOOP;
PPCommentTable.AddComment[end + 1, BufferToText[], end, 0];
[] ← ResetScanIndex[end]};
-- string literals
EnterText: PROC [removeQuotes: BOOLEAN ← FALSE] RETURNS [P1.Value] = {
lti: LTIndex ← WrapLit[NIL];
lti.value ← LOOPHOLE[lti.literal];
RETURN [[r: lti]]};
-- initialization/finalization
ScanInit: PUBLIC PROC [table: ParseTable.TableRef] = {
hashTab ← @table[table.scanTable.hashTab];
scanTab ← @table[table.scanTable.scanTab];
vocab ← LOOPHOLE[@table[table.scanTable.vocabBody]];
vocabIndex ← @table[table.scanTable.vocabIndex];
rf ← SakuraOps.GetSource[];
tPos ← -1;
tLimit ← rf.Size[];
PPCommentTable.Reset[];
lastToken ← 0;
NextChar[];
qDot ← FALSE;
nTokens ← nErrors ← 0};
ScanReset: PUBLIC PROC RETURNS [CARDINAL, CARDINAL] = {
rf ← NIL;
RETURN [nTokens, nErrors]};
-- error handling
ResetScanIndex: PUBLIC PROC [index: Index] RETURNS [success: BOOLEAN] = {
tPos ← index - 1;
NextChar[];
RETURN [TRUE]};
ScanError: PROC [code: {number, string, char, atom}, tokenIndex: Index] = {
errorStream: IOStream.Handle ← SakuraOps.GetLog[];
nErrors ← nErrors + 1;
ErrorContext[errorStream, SELECT code FROM
number => "invalid number",
string => "string unterminated or too long",
char => "invalid character",
atom => "invalid atom",
ENDCASE => NIL,
tokenIndex];
errorStream.PutChar[CR];
};
ErrorContext: PUBLIC PROC
[to: IOStream.Handle, message: Rope.Ref, tokenIndex: Index] = {
index: Index ← tokenIndex;
limit: Index ← tokenIndex - 50;
start: Index;
IF limit < 0 THEN limit ← 0;
to.PutChar[CR];
DO
c: CHARACTER ← rf.Fetch[index];
IF c = CR THEN {index ← index+1; EXIT};
IF index = limit THEN EXIT;
index ← index - 1;
ENDLOOP;
start ← index;
FOR index IN [start..MIN[start+100,rf.Size[]]) DO
c: CHARACTER ← rf.Fetch[index];
IF c=CR THEN EXIT;
to.PutChar[c];
ENDLOOP;
to.PutChar[CR];
FOR index IN [start..tokenIndex) DO
c: CHARACTER ← rf.Fetch[index];
to.PutChar[IF c = TAB THEN TAB ELSE ' ];
ENDLOOP;
PutRope[to, "↑ "];
PutRope[to, message];
PutRope[to, " ["];
PutDecimal[to, tokenIndex];
to.PutChar[']];
to.PutChar[CR]};
END.