PPScanner.Mesa
Ed Satterthwaite, January 12, 1981 12:37 PM
Russ Atkinson, May 19, 1983 4:10 pm
added export of PPComData
DIRECTORY
BBZones USING [GetPrefixedZone],
CedarScanner USING
[CharFromToken, ContentsFromToken, GetClosure, GetProc, GetToken, IntegerOverflow, IntFromToken, RealFromToken, RopeFromToken, Token],
PPComData USING [],
PPCommentTable USING [AddComment, AddBreakHint, Reset, SetEnding],
PPLeaves USING [HTIndex, HTNode, ISEIndex, LTIndex, LTNode],
PPOps USING [GetSource],
PPP1 USING [Token, Value, NullValue],
PPParseTable USING
[Handle, HashIndex, TSymbol, VocabHashEntry, EndMarker, tokenARROW, tokenATOM, tokenCHAR, tokenDOTS, tokenGE, tokenID, tokenLE, tokenFLNUM, tokenLNUM, tokenSTR],
PPUtil USING [ShowChar, ShowCR, ShowRope],
Real USING [RealException],
Rope USING [Equal, Fetch, Flatten, ROPE, Run, Size, Text];
PPScanner: PROGRAM
IMPORTS BBZones, CedarScanner, PPCommentTable, PPOps, PPUtil, Real, Rope
EXPORTS PPComData, PPP1
SHARES Rope
= BEGIN OPEN PPLeaves, PPParseTable, P1: PPP1, PPUtil, Rope;
stuff exported to PPComData
idANY: PUBLIC PPLeaves.ISEIndex ← "UNSPECIFIED";
idINT: PUBLIC PPLeaves.ISEIndex ← "INTEGER";
idLOCK: PUBLIC PPLeaves.ISEIndex ← "LOCK";
Init: PUBLIC SAFE PROC = TRUSTED {};
stuff supporting the scanner
hashTab: LONG POINTER TO ARRAY HashIndex OF VocabHashEntry;
scanTab: LONG POINTER TO ARRAY CHAR [40C..177C] OF TSymbol;
vocab: LONG STRING;
vocabIndex: LONG POINTER TO ARRAY TSymbol OF CARDINAL;
rf: ROPENIL; -- the source
tLimit: INT ← 0;
pz: ZONE ← BBZones.GetPrefixedZone[];
toklen: NAT ← 0; -- current token length
tokpos: INT ← 0; -- source index for start of token
nTokens: CARDINAL; -- token count
nErrors: CARDINAL; -- lexical errors
lastToken: INT ← 0;
IdFromRope: PROC [r: ROPE, index: INT] RETURNS [HTIndex] = {
RETURN [pz.NEW[HTNode ← [index: index, name: r]]]};
IdFirst: HTIndex ← IdFromRope["first", LAST[INT]];
IDLock: HTIndex ← IdFromRope["LOCK", LAST[INT]];
IDRest: HTIndex ← IdFromRope["rest", LAST[INT]];
IdOfFirst: PUBLIC SAFE PROC
RETURNS [HTIndex] = TRUSTED {RETURN [IdFirst]};
IdOfLock: PUBLIC SAFE PROC
RETURNS [HTIndex] = TRUSTED {RETURN [IDLock]};
IdOfRest: PUBLIC SAFE PROC
RETURNS [HTIndex] = TRUSTED {RETURN [IDRest]};
Atom: PUBLIC SAFE PROC RETURNS [token: P1.Token] = TRUSTED {
CRcount: NAT ← 0;
formLim: INT ← -1;
allcaps: BOOLTRUE;
getChar1: CedarScanner.GetProc = TRUSTED {
c: CHAR ← 0C;
IF index < tLimit THEN SELECT (c ← rf.Fetch[index]) FROM
'\n => {
CRcount ← CRcount + 1;
IF index > formLim THEN {
formLim ← index;
PPCommentTable.AddBreakHint[index];
};
};
'\f => IF index > formLim THEN {
formLim ← index;
PPCommentTable.AddComment[index, "\f", lastToken, CRcount];
CRcount ← 0;
};
'& => c ← 'a;
ENDCASE;
RETURN [c];
};
getChar2: CedarScanner.GetProc = TRUSTED {
IF index < tLimit THEN RETURN[rf.Fetch[index]] ELSE RETURN [0C];
};
get1: CedarScanner.GetClosure ← [getChar1];
get2: CedarScanner.GetClosure ← [getChar2];
ctok: CedarScanner.Token;
DO
CRcount ← 0;
ctok ← CedarScanner.GetToken[get1, tokpos];
token.index ← tokpos ← ctok.start;
toklen ← ctok.next - tokpos;
IF CRcount > 0 THEN {
IF tokpos < formLim THEN CRcount ← CRcount - 1;
IF CRcount > 1 THEN {
PPCommentTable.AddComment[tokpos, NIL, lastToken, CRcount];
CRcount ← 0;
}};
SELECT ctok.kind FROM
tokenID => {
an identifier or reserved word
allcaps: BOOLTRUE;
limit: INT ← ctok.next-1;
r: ROPENIL;
token.class ← tokenID;
FOR i: INT IN [tokpos..limit] DO
c: CHAR ← rf.Fetch[i];
IF c NOT IN ['A..'Z] THEN {allcaps ← FALSE; EXIT};
ENDLOOP;
IF allcaps THEN {
This could be a reserved word...
first: CARDINALLOOPHOLE[rf.Fetch[tokpos]];
last: CARDINALLOOPHOLE[rf.Fetch[limit]];
h: CARDINAL ← (first * 128 - first + last) MOD LAST[HashIndex] + 1;
j: CARDINAL ← 0;
len: NAT ← toklen;
WHILE (j ← hashTab[h].symbol) # 0 DO
s2: CARDINAL ← vocabIndex[j - 1];
IF vocabIndex[j] - s2 = len THEN
FOR s1: CARDINAL IN [0..len) DO
IF rf.Fetch[tokpos+s1] # vocab[s2] THEN EXIT;
s2 ← s2 + 1
REPEAT
FINISHED => {token.class ← j; GO TO CheckEnd};
ENDLOOP;
IF (h ← hashTab[h].link) = 0 THEN EXIT
ENDLOOP;
};
token.value.r ← IdFromRope[
r ← CedarScanner.ContentsFromToken[get2, ctok], tokpos];
IF allcaps THEN
language additions since our parser was created, treat as comments
SELECT TRUE FROM
Rope.Equal[r, "CEDAR"], Rope.Equal[r, "TRUSTED"], Rope.Equal[r, "SAFE"], Rope.Equal[r, "UNSAFE"], Rope.Equal[r, "CHECKED"] => {
tokpos ← ctok.next;
LOOP};
ENDCASE;
};
tokenINT => {
an INT literal
ENABLE CedarScanner.IntegerOverflow => GO TO badNumber;
token.class ← tokenLNUM;
token.value.r ← WrapLit[pz.NEW[INT ← CedarScanner.IntFromToken[get2, ctok]]];
};
tokenREAL => {
a REAL literal
r: REAL ← 0.0;
r ← CedarScanner.RealFromToken[
get2, ctok
! Real.RealException => TRUSTED {GO TO badNumber}];
token.class ← tokenFLNUM;
token.value.r ← WrapLit[pz.NEW[REAL ← r]];
};
tokenROPE => {
a ROPE literal
token.class ← tokenSTR;
token.value.r ← WrapLit[pz.NEW[ROPE ← CedarScanner.RopeFromToken[get2, ctok]]];
};
tokenCHAR => {
a CHAR literal
token.class ← tokenCHAR;
token.value.r ← WrapLit[pz.NEW[CHAR ← CedarScanner.CharFromToken[get2, ctok]]];
};
tokenATOM => {
an ATOM literal
token.class ← tokenATOM;
token.value.r ← CedarScanner.ContentsFromToken[get2, ctok];
};
tokenSINGLE => {
a single-character token
token.class ← scanTab[rf.Fetch[tokpos]];
};
tokenDOUBLE => {
a double-character token
c1: CHAR ← rf.Fetch[tokpos];
SELECT c1 FROM
'= => token.class ← tokenARROW;
'< => token.class ← tokenLE;
'> => token.class ← tokenGE;
'. => token.class ← tokenDOTS
ENDCASE => ERROR;
};
tokenCOMMENT => {
a comment
comment: Rope.Text ← CedarScanner.ContentsFromToken[get2, ctok].Flatten[];
PPCommentTable.AddComment[tokpos, comment, lastToken, CRcount];
tokpos ← ctok.next;
LOOP;
};
tokenEOF => {
token.class ← EndMarker;
token.value ← P1.NullValue;
};
tokenERROR => {
token.msg describes the scanning error
ErrorContext[ctok.msg, tokpos];
};
ENDCASE => ERROR; -- all cases should have been covered
EXIT;
REPEAT
badNumber => {
ErrorContext["invalid number", tokpos];
};
CheckEnd =>
IF rf.Fetch[ctok.next] = '. AND toklen = 3
AND Rope.Run[rf, tokpos, "END", 0] = 3 THEN {
accumulate the ending comment
pos: INT ← ctok.next+1;
PPCommentTable.AddComment[pos, rf.Flatten[pos], tLimit, 0];
PPCommentTable.SetEnding[pos];
};
ENDLOOP;
Every token return must come through here
nTokens ← nTokens + 1;
lastToken ← tokpos;
tokpos ← ctok.next;
RETURN};
numerical conversion
WrapLit: PROC [r: REF ANY] RETURNS [LTIndex] = {
RETURN [pz.NEW[LTNode ← [index: tokpos, value: r, literal: rf.Flatten[tokpos, toklen]]]]};
initialization/finalization
ScanInit: PUBLIC SAFE PROC [table: PPParseTable.Handle] = TRUSTED {
hashTab ← @table.scanTable.hashTab;
scanTab ← @table.scanTable.scanTab;
vocab ← LOOPHOLE[@table.scanTable.vocabBody];
vocabIndex ← @table.scanTable.vocabIndex;
rf ← PPOps.GetSource[];
tokpos ← 0;
tLimit ← rf.Size[];
PPCommentTable.Reset[];
lastToken ← 0;
nTokens ← nErrors ← 0};
ScanReset: PUBLIC SAFE PROC
RETURNS [CARDINAL, CARDINAL] = TRUSTED {
rf ← NIL;
RETURN [nTokens, nErrors]};
error handling
ResetScanIndex: PUBLIC SAFE PROC
[index: INT] RETURNS [success: BOOL] = TRUSTED {
tokpos ← index;
RETURN [TRUE]};
ErrorContext: PUBLIC SAFE PROC
[message: ROPE, tokenIndex: INT] = TRUSTED {
low: INT ← tokenIndex - 40;
high: INT ← tokenIndex + 40;
nErrors ← nErrors + 1;
IF low < 0 THEN low ← 0;
IF high >= rf.Size[] THEN high ← rf.Size[]-1;
ShowCR[];
IF low > 0 THEN ShowRope["..."];
FOR i: INT IN [low..high] DO
c: CHAR ← rf.Fetch[i];
IF i = tokenIndex THEN ShowRope[" *^* "];
ShowChar[c];
ENDLOOP;
IF high < rf.Size[]-1 THEN ShowRope["..."];
ShowCR[];
ShowRope[message];
ShowCR[]};
END.