PPScanner.Mesa
Copyright © 1985 by Xerox Corporation. All rights reserved.
Ed Satterthwaite, January 12, 1981 12:37 PM
Russ Atkinson, February 12, 1985 3:38:03 pm PST
Paul Rovner, September 26, 1983 1:06 pm
DIRECTORY
Convert,
IO,
PPLeaves USING [HTIndex, HTNode, LTIndex, LTNode],
PPP1 USING [Token, Value, NullValue],
PPParseTable USING [Handle, HashIndex, TSymbol, VocabHashEntry, EndMarker, tokenARROW, tokenATOM, tokenCHAR, tokenDOTS, tokenGE, tokenID, tokenLE, tokenFLNUM, tokenLNUM, tokenSTR],
Real USING [RealException],
Rope USING [Fetch, Flatten, Length, ROPE, Size];
PPScanner: PROGRAM
IMPORTS Convert, IO, Real, Rope
EXPORTS PPP1
SHARES Rope
= BEGIN OPEN PPLeaves, PPParseTable, P1: PPP1;
ROPE: TYPE = Rope.ROPE;
STREAM: TYPE = IO.STREAM;
stuff supporting the scanner
hashTab: LONG POINTER TO ARRAY HashIndex OF VocabHashEntry ← NIL;
scanTab: LONG POINTER TO ARRAY CHAR [40C..177C] OF TSymbol ← NIL;
vocab: LONG STRINGNIL;
vocabIndex: LONG POINTER TO ARRAY TSymbol OF CARDINALNIL;
rf: ROPENIL; -- the source
rs: STREAMNIL; -- the source as stream
toklen: NAT ← 0; -- current token length
tokpos: INT ← 0; -- source index for start of token
nTokens: CARDINAL; -- token count
nErrors: CARDINAL; -- lexical errors
lastToken: INT ← 0;
IdFromRope: PROC [r: ROPE, index: INT] RETURNS [HTIndex] = {
RETURN [NEW[HTNode ← [index: index, name: r]]]};
IdFirst: HTIndex ← IdFromRope["first", LAST[INT]];
IDLock: HTIndex ← IdFromRope["LOCK", LAST[INT]];
IDRest: HTIndex ← IdFromRope["rest", LAST[INT]];
IdOfFirst: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IdFirst]};
IdOfLock: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IDLock]};
IdOfRest: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IDRest]};
Atom: PUBLIC SAFE PROC [errPut: IO.STREAM] RETURNS [token: P1.Token] = TRUSTED {
tokenKind: IO.TokenKind;
rope: ROPE;
charsSkipped: INT;
DO
peek: CHAR ← 0C;
[] ← IO.SkipWhitespace[rs, TRUE ! IO.EndOfStream => EXIT];
peek ← IO.PeekChar[rs ! IO.EndOfStream => EXIT];
tokpos ← IO.GetIndex[rs];
SELECT peek FROM
'%, '& => {
special case of identifier, don't let IO.GetCedarTokenRope see it!
DO
[] ← IO.GetChar[rs];
peek ← IO.PeekChar[rs ! IO.EndOfStream => EXIT];
SELECT peek FROM
IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {};
ENDCASE => EXIT;
ENDLOOP;
toklen ← IO.GetIndex[rs] - tokpos;
token.class ← tokenID;
token.value.r ← IdFromRope[rf.Flatten[tokpos, toklen], tokpos];
EXIT;
};
ENDCASE => NULL;
[tokenKind, rope, charsSkipped] ← IO.GetCedarTokenRope[rs
!
IO.Error => {
ErrorContext["Syntax error", IO.GetIndex[rs], errPut]; EXIT};
IO.EndOfStream => {
ErrorContext["Unexpected end of stream", IO.GetIndex[rs], errPut]; EXIT}];
toklen ← rope.Length[];
tokpos ← rs.GetIndex[] - toklen;
SELECT tokenKind FROM
tokenID => {
an identifier or reserved word
allcaps: BOOLTRUE;
r: ROPENIL;
token.class ← tokenID;
FOR i: INT IN [0..rope.Size[]) DO
IF rope.Fetch[i] NOT IN ['A..'Z] THEN {allcaps ← FALSE; EXIT};
ENDLOOP;
IF allcaps THEN {
This could be a reserved word...
first: CARDINALLOOPHOLE[rope.Fetch[0], CARDINAL];
last: CARDINALLOOPHOLE[rope.Fetch[rope.Size[]-1], CARDINAL];
h: CARDINAL ← (first * 128 - first + last) MOD LAST[HashIndex] + 1;
j: CARDINAL ← 0;
len: NAT ← rope.Size[];
WHILE (j ← hashTab[h].symbol) # 0 DO
s2: CARDINAL ← vocabIndex[j - 1];
IF vocabIndex[j] - s2 = len THEN
FOR s1: CARDINAL IN [0..len) DO
IF rope.Fetch[s1] # vocab[s2] THEN EXIT;
s2 ← s2 + 1;
REPEAT
FINISHED => {token.class ← j; GO TO CheckEnd};
ENDLOOP;
IF (h ← hashTab[h].link) = 0 THEN EXIT
ENDLOOP;
};
token.value.r ← IdFromRope[rope, tokpos];
};
tokenDECIMAL => {
a DECIMAL literal
ENABLE Convert.Error => GO TO badNumber;
token.class ← tokenLNUM;
token.value.r ← WrapLit[NEW[INT
LOOPHOLE[Convert.CardFromDecimalLiteral[rope]]]];
};
tokenOCTAL => {
an OCTAL literal
ENABLE Convert.Error => GO TO badNumber;
token.class ← tokenLNUM;
token.value.r ← WrapLit[NEW[INT
LOOPHOLE[Convert.CardFromOctalLiteral[rope]]]];
};
tokenHEX => {
a HEX literal
token.class ← tokenLNUM;
token.value.r ← WrapLit[NEW[INT
LOOPHOLE[Convert.CardFromHexLiteral[rope]]]];
};
tokenREAL => {
a REAL literal
ENABLE Convert.Error, Real.RealException => TRUSTED{GO TO badNumber};
token.class ← tokenFLNUM;
token.value.r ← WrapLit[NEW[REAL ← Convert.RealFromLiteral[rope]]];
};
tokenROPE => {
a ROPE literal
token.class ← tokenSTR;
token.value.r ← WrapLit[NEW[ROPE ← Convert.RopeFromLiteral[rope]]];
};
tokenCHAR => {
a CHAR literal
token.class ← tokenCHAR;
token.value.r ← WrapLit[NEW[CHAR ← Convert.CharFromLiteral[rope]]];
};
tokenATOM => {
an ATOM literal
token.class ← tokenATOM;
token.value.r ← WrapLit[NEW[ATOM ← Convert.AtomFromRope[rope]]];
};
tokenSINGLE => {
a single-character token
token.class ← scanTab[rf.Fetch[tokpos]];
};
tokenDOUBLE => {
a double-character token
c1: CHAR ← rf.Fetch[tokpos];
SELECT c1 FROM
'= => token.class ← tokenARROW;
'< => token.class ← tokenLE;
'> => token.class ← tokenGE;
'. => token.class ← tokenDOTS
ENDCASE => ERROR;
};
tokenCOMMENT => {
a comment
LOOP;
};
tokenEOF => {
token.class ← EndMarker;
token.value ← P1.NullValue;
};
tokenERROR => {
token.msg describes the scanning error
ErrorContext["Syntax error", tokpos, errPut];
};
ENDCASE => ERROR; -- all cases should have been covered
EXIT;
REPEAT
badNumber => {
ErrorContext["invalid number", tokpos, errPut];
};
CheckEnd => {};
ENDLOOP;
Every token return must come through here
nTokens ← nTokens + 1;
lastToken ← tokpos;
RETURN;
};
numerical conversion
WrapLit: PROC [r: REF ANY] RETURNS [LTIndex] = {
RETURN [NEW[LTNode ← [index: tokpos, value: r, literal: rf.Flatten[tokpos, toklen]]]]};
initialization/finalization
ScanInit: PUBLIC SAFE PROC [table: PPParseTable.Handle, source: ROPE] = TRUSTED {
hashTab ← @table.scanTable.hashTab;
scanTab ← @table.scanTable.scanTab;
vocab ← LOOPHOLE[@table.scanTable.vocabBody];
vocabIndex ← @table.scanTable.vocabIndex;
rf ← source;
rs ← IO.RIS[rf];
tokpos ← 0;
lastToken ← 0;
nTokens ← nErrors ← 0};
ScanReset: PUBLIC SAFE PROC
RETURNS [CARDINAL, CARDINAL] = TRUSTED {
rf ← NIL;
rs ← NIL;
RETURN [nTokens, nErrors]};
error handling
ResetScanIndex: PUBLIC SAFE PROC [index: INT] RETURNS [success: BOOL] = TRUSTED {
tokpos ← index;
RETURN [TRUE]};
ErrorContext: PUBLIC SAFE PROC [message: ROPE, tokenIndex: INT, put: IO.STREAM] = TRUSTED {
low: INT ← tokenIndex - 40;
high: INT ← tokenIndex + 40;
nErrors ← nErrors + 1;
IF low < 0 THEN low ← 0;
IF high >= rf.Size[] THEN high ← rf.Size[]-1;
put.PutChar['\n];
IF low > 0 THEN put.PutRope["..."];
FOR i: INT IN [low..high] DO
c: CHAR ← rf.Fetch[i];
IF i = tokenIndex THEN put.PutRope[" *^* "];
put.PutChar[c];
ENDLOOP;
IF high < rf.Size[]-1 THEN put.PutRope["..."];
put.PutChar['\n];
put.PutRope[message];
put.PutChar['\n];
};
END.