[Indigo]<Cedar>BugBane>PPScanner.mesa!8

PPScanner.Mesa

Ed Satterthwaite, January 12, 1981 12:37 PM

Russ Atkinson, May 19, 1983 4:10 pm

added export of PPComData

DIRECTORY

BBZones USING [GetPrefixedZone],

CedarScanner USING

[CharFromToken, ContentsFromToken, GetClosure, GetProc, GetToken, IntegerOverflow, IntFromToken, RealFromToken, RopeFromToken, Token],

PPComData USING [],

PPCommentTable USING [AddComment, AddBreakHint, Reset, SetEnding],

PPLeaves USING [HTIndex, HTNode, ISEIndex, LTIndex, LTNode],

PPOps USING [GetSource],

PPP1 USING [Token, Value, NullValue],

PPParseTable USING

[Handle, HashIndex, TSymbol, VocabHashEntry, EndMarker, tokenARROW, tokenATOM, tokenCHAR, tokenDOTS, tokenGE, tokenID, tokenLE, tokenFLNUM, tokenLNUM, tokenSTR],

PPUtil USING [ShowChar, ShowCR, ShowRope],

Real USING [RealException],

Rope USING [Equal, Fetch, Flatten, ROPE, Run, Size, Text];

PPScanner: PROGRAM

IMPORTS BBZones, CedarScanner, PPCommentTable, PPOps, PPUtil, Real, Rope

EXPORTS PPComData, PPP1

SHARES Rope

= BEGIN OPEN PPLeaves, PPParseTable, P1: PPP1, PPUtil, Rope;

stuff exported to PPComData

idANY: PUBLIC PPLeaves.ISEIndex ← "UNSPECIFIED";

idINT: PUBLIC PPLeaves.ISEIndex ← "INTEGER";

idLOCK: PUBLIC PPLeaves.ISEIndex ← "LOCK";

Init: PUBLIC SAFE PROC = TRUSTED {};

stuff supporting the scanner

hashTab: LONG POINTER TO ARRAY HashIndex OF VocabHashEntry;

scanTab: LONG POINTER TO ARRAY CHAR [40C..177C] OF TSymbol;

vocab: LONG STRING;

vocabIndex: LONG POINTER TO ARRAY TSymbol OF CARDINAL;

rf: ROPE ← NIL; -- the source

tLimit: INT ← 0;

pz: ZONE ← BBZones.GetPrefixedZone[];

toklen: NAT ← 0; -- current token length

tokpos: INT ← 0; -- source index for start of token

nTokens: CARDINAL; -- token count

nErrors: CARDINAL; -- lexical errors

lastToken: INT ← 0;

IdFromRope: PROC [r: ROPE, index: INT] RETURNS [HTIndex] = {

RETURN [pz.NEW[HTNode ← [index: index, name: r]]]};

IdFirst: HTIndex ← IdFromRope["first", LAST[INT]];

IDLock: HTIndex ← IdFromRope["LOCK", LAST[INT]];

IDRest: HTIndex ← IdFromRope["rest", LAST[INT]];

IdOfFirst: PUBLIC SAFE PROC

RETURNS [HTIndex] = TRUSTED {RETURN [IdFirst]};

IdOfLock: PUBLIC SAFE PROC

RETURNS [HTIndex] = TRUSTED {RETURN [IDLock]};

IdOfRest: PUBLIC SAFE PROC

RETURNS [HTIndex] = TRUSTED {RETURN [IDRest]};

Atom: PUBLIC SAFE PROC RETURNS [token: P1.Token] = TRUSTED {

CRcount: NAT ← 0;

formLim: INT ← -1;

allcaps: BOOL ← TRUE;

getChar1: CedarScanner.GetProc = TRUSTED {

c: CHAR ← 0C;

IF index < tLimit THEN SELECT (c ← rf.Fetch[index]) FROM

'\n => {

CRcount ← CRcount + 1;

IF index > formLim THEN {

formLim ← index;

PPCommentTable.AddBreakHint[index];

};

'\f => IF index > formLim THEN {

formLim ← index;

PPCommentTable.AddComment[index, "\f", lastToken, CRcount];

CRcount ← 0;

};

'& => c ← 'a;

ENDCASE;

RETURN [c];

};

getChar2: CedarScanner.GetProc = TRUSTED {

IF index < tLimit THEN RETURN[rf.Fetch[index]] ELSE RETURN [0C];

};

get1: CedarScanner.GetClosure ← [getChar1];

get2: CedarScanner.GetClosure ← [getChar2];

ctok: CedarScanner.Token;

CRcount ← 0;

ctok ← CedarScanner.GetToken[get1, tokpos];

token.index ← tokpos ← ctok.start;

toklen ← ctok.next - tokpos;

IF CRcount > 0 THEN {

IF tokpos < formLim THEN CRcount ← CRcount - 1;

IF CRcount > 1 THEN {

PPCommentTable.AddComment[tokpos, NIL, lastToken, CRcount];

CRcount ← 0;

}};

SELECT ctok.kind FROM

tokenID => {

an identifier or reserved word

allcaps: BOOL ← TRUE;

limit: INT ← ctok.next-1;

r: ROPE ← NIL;

token.class ← tokenID;

FOR i: INT IN [tokpos..limit] DO

c: CHAR ← rf.Fetch[i];

IF c NOT IN ['A..'Z] THEN {allcaps ← FALSE; EXIT};

ENDLOOP;

IF allcaps THEN {

This could be a reserved word...

first: CARDINAL ← LOOPHOLE[rf.Fetch[tokpos]];

last: CARDINAL ← LOOPHOLE[rf.Fetch[limit]];

h: CARDINAL ← (first * 128 - first + last) MOD LAST[HashIndex] + 1;

j: CARDINAL ← 0;

len: NAT ← toklen;

WHILE (j ← hashTab[h].symbol) # 0 DO

s2: CARDINAL ← vocabIndex[j - 1];

IF vocabIndex[j] - s2 = len THEN

FOR s1: CARDINAL IN [0..len) DO

IF rf.Fetch[tokpos+s1] # vocab[s2] THEN EXIT;

s2 ← s2 + 1

REPEAT

FINISHED => {token.class ← j; GO TO CheckEnd};

ENDLOOP;

IF (h ← hashTab[h].link) = 0 THEN EXIT

ENDLOOP;

};

token.value.r ← IdFromRope[

r ← CedarScanner.ContentsFromToken[get2, ctok], tokpos];

IF allcaps THEN

language additions since our parser was created, treat as comments

SELECT TRUE FROM

Rope.Equal[r, "CEDAR"], Rope.Equal[r, "TRUSTED"], Rope.Equal[r, "SAFE"], Rope.Equal[r, "UNSAFE"], Rope.Equal[r, "CHECKED"] => {

tokpos ← ctok.next;

LOOP};

ENDCASE;

};

tokenINT => {

an INT literal

ENABLE CedarScanner.IntegerOverflow => GO TO badNumber;

token.class ← tokenLNUM;

token.value.r ← WrapLit[pz.NEW[INT ← CedarScanner.IntFromToken[get2, ctok]]];

};

tokenREAL => {

a REAL literal

r: REAL ← 0.0;

r ← CedarScanner.RealFromToken[

get2, ctok

! Real.RealException => TRUSTED {GO TO badNumber}];

token.class ← tokenFLNUM;

token.value.r ← WrapLit[pz.NEW[REAL ← r]];

};

tokenROPE => {

a ROPE literal

token.class ← tokenSTR;

token.value.r ← WrapLit[pz.NEW[ROPE ← CedarScanner.RopeFromToken[get2, ctok]]];

};

tokenCHAR => {

a CHAR literal

token.class ← tokenCHAR;

token.value.r ← WrapLit[pz.NEW[CHAR ← CedarScanner.CharFromToken[get2, ctok]]];

};

tokenATOM => {

an ATOM literal

token.class ← tokenATOM;

token.value.r ← CedarScanner.ContentsFromToken[get2, ctok];

};

tokenSINGLE => {

a single-character token

token.class ← scanTab[rf.Fetch[tokpos]];

};

tokenDOUBLE => {

a double-character token

c1: CHAR ← rf.Fetch[tokpos];

SELECT c1 FROM

'= => token.class ← tokenARROW;

'< => token.class ← tokenLE;

'> => token.class ← tokenGE;

'. => token.class ← tokenDOTS

ENDCASE => ERROR;

};

tokenCOMMENT => {

a comment

comment: Rope.Text ← CedarScanner.ContentsFromToken[get2, ctok].Flatten[];

PPCommentTable.AddComment[tokpos, comment, lastToken, CRcount];

tokpos ← ctok.next;

LOOP;

};

tokenEOF => {

token.class ← EndMarker;

token.value ← P1.NullValue;

};

tokenERROR => {

token.msg describes the scanning error

ErrorContext[ctok.msg, tokpos];

};

ENDCASE => ERROR; -- all cases should have been covered

EXIT;

REPEAT

badNumber => {

ErrorContext["invalid number", tokpos];

};

CheckEnd =>

IF rf.Fetch[ctok.next] = '. AND toklen = 3

AND Rope.Run[rf, tokpos, "END", 0] = 3 THEN {

accumulate the ending comment

pos: INT ← ctok.next+1;

PPCommentTable.AddComment[pos, rf.Flatten[pos], tLimit, 0];

PPCommentTable.SetEnding[pos];

};

ENDLOOP;

Every token return must come through here

nTokens ← nTokens + 1;

lastToken ← tokpos;

tokpos ← ctok.next;

RETURN};

numerical conversion

WrapLit: PROC [r: REF ANY] RETURNS [LTIndex] = {

RETURN [pz.NEW[LTNode ← [index: tokpos, value: r, literal: rf.Flatten[tokpos, toklen]]]]};

initialization/finalization

ScanInit: PUBLIC SAFE PROC [table: PPParseTable.Handle] = TRUSTED {

hashTab ← @table.scanTable.hashTab;

scanTab ← @table.scanTable.scanTab;

vocab ← LOOPHOLE[@table.scanTable.vocabBody];

vocabIndex ← @table.scanTable.vocabIndex;

rf ← PPOps.GetSource[];

tokpos ← 0;

tLimit ← rf.Size[];

PPCommentTable.Reset[];

lastToken ← 0;

nTokens ← nErrors ← 0};

ScanReset: PUBLIC SAFE PROC

RETURNS [CARDINAL, CARDINAL] = TRUSTED {

rf ← NIL;

RETURN [nTokens, nErrors]};

error handling

ResetScanIndex: PUBLIC SAFE PROC

[index: INT] RETURNS [success: BOOL] = TRUSTED {

tokpos ← index;

RETURN [TRUE]};

ErrorContext: PUBLIC SAFE PROC

[message: ROPE, tokenIndex: INT] = TRUSTED {

low: INT ← tokenIndex - 40;

high: INT ← tokenIndex + 40;

nErrors ← nErrors + 1;

IF low < 0 THEN low ← 0;

IF high >= rf.Size[] THEN high ← rf.Size[]-1;

ShowCR[];

IF low > 0 THEN ShowRope["..."];

FOR i: INT IN [low..high] DO

c: CHAR ← rf.Fetch[i];

IF i = tokenIndex THEN ShowRope[" *^* "];

ShowChar[c];

ENDLOOP;

IF high < rf.Size[]-1 THEN ShowRope["..."];

ShowCR[];

ShowRope[message];

ShowCR[]};

END.