MPScanner.mesa
Copyright Ó 1985, 1986, 1987, 1990, 1991 by Xerox Corporation. All rights reserved.
Russ Atkinson (RRA) May 13, 1987 10:45:07 pm PDT
Last tweaked by Mike Spreitzer on December 2, 1987 4:17:27 pm PST
JKF August 4, 1989 9:44:53 am PDT
Michael Plass, March 22, 1993 4:48 pm PST
DIRECTORY
Convert,
IO,
MPLeaves USING [HTIndex, HTNode, LTIndex, LTNode],
MPP1,
MPParseTable,
Real USING [RealException],
Rope USING [Fetch, Flatten, Length, ROPE, Size];
MPScanner: PROGRAM
IMPORTS Convert, IO, MPParseTable, Real, Rope
EXPORTS MPP1
SHARES Rope
= BEGIN OPEN MPLeaves, MPParseTable, MPP1;
ROPE: TYPE = Rope.ROPE;
STREAM: TYPE = IO.STREAM;
stuff supporting the scanner
hashTab: HashTableRef;
scanTab: ScanTableRef ¬ NIL;
vocab: VocabularyRef ¬ NIL;
vocabIndex: IndexTableRef ¬ NIL;
<<
hashTab: LONG POINTER TO ARRAY HashIndex OF VocabHashEntry ¬ NIL;
scanTab: LONG POINTER TO ARRAY CHAR [40C..177C] OF TSymbol ¬ NIL;
vocab: LONG STRING ¬ NIL;
vocabIndex: LONG POINTER TO ARRAY TSymbol OF CARDINAL ¬ NIL; >>
rf: ROPE ¬ NIL; -- the source
rs: STREAM ¬ NIL; -- the source as stream
toklen: NAT ¬ 0; -- current token length
tokpos: INT ¬ 0; -- source index for start of token
nTokens: CARDINAL ¬ 0; -- token count
nErrors: CARDINAL; -- lexical errors
lastToken: INT ¬ 0;
IdFromRope: PROC [r: ROPE, index: INT] RETURNS [HTIndex] = {
RETURN [NEW[HTNode ¬ [index: index, name: r]]]};
IdFirst: HTIndex ¬ IdFromRope["first", LAST[INT]];
IDLock: HTIndex ¬ IdFromRope["LOCK", LAST[INT]];
IDRest: HTIndex ¬ IdFromRope["rest", LAST[INT]];
IdOfFirst: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IdFirst]};
IdOfLock: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IDLock]};
IdOfRest: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IDRest]};
Atom: PUBLIC SAFE PROC [errPut: IO.STREAM] RETURNS [token: MPP1.Token ¬ MPP1.nullToken] = TRUSTED {
tokenKind: IO.TokenKind;
rope: ROPE;
charsSkipped: INT;
DO
peek: CHAR ¬ 0C;
[] ¬ IO.SkipWhitespace[rs, TRUE ! IO.EndOfStream => EXIT];
peek ¬ IO.PeekChar[rs ! IO.EndOfStream => EXIT];
tokpos ¬ IO.GetIndex[rs];
token.index ¬ tokpos;
SELECT peek FROM
'%, '& => {
special case of identifier, don't let IO.GetCedarTokenRope see it!
DO
[] ¬ IO.GetChar[rs];
peek ¬ IO.PeekChar[rs ! IO.EndOfStream => EXIT];
SELECT peek FROM
IN ['a..'z], IN ['A..'Z], IN ['0..'9], '%, '& => {};
ENDCASE => EXIT;
ENDLOOP;
toklen ¬ IO.GetIndex[rs] - tokpos;
token.class ¬ tokenID;
token.value.r ¬ IdFromRope[rf.Flatten[tokpos, toklen], tokpos];
EXIT;
};
ENDCASE;
[tokenKind, rope, charsSkipped] ¬ IO.GetCedarTokenRope[rs
!
IO.Error => {
ErrorContext["Syntax error", IO.GetIndex[rs], errPut]; EXIT};
IO.EndOfStream => {
ErrorContext["Unexpected end of stream", IO.GetIndex[rs], errPut]; EXIT
tokenKind ¬ tokenEOF;
rope ¬ "";
CONTINUE;
}
];
toklen ¬ rope.Length[];
tokpos ¬ rs.GetIndex[] - toklen;
SELECT tokenKind FROM
tokenID => {
an identifier or reserved word
allcaps: BOOL ¬ TRUE;
r: ROPE ¬ NIL;
token.class ¬ tokenID;
FOR i: INT IN [0..rope.Size[]) DO
IF rope.Fetch[i] IN ['a..'z] THEN {allcaps ¬ FALSE; EXIT};
ENDLOOP;
IF allcaps THEN {
This could be a reserved word...
first: CARDINAL ¬ ORD[rope.Fetch[0]];
last: CARDINAL ¬ ORD[rope.Fetch[rope.Size[]-1]];
h: CARDINAL ¬ (first * 128 - first + last) MOD LAST[HashIndex] + 1;
j: CARDINAL ¬ 0;
len: NAT ¬ rope.Size[];
WHILE (j ¬ hashTab[h].symbol) # 0 DO
s2: CARDINAL ¬ vocabIndex[j - 1];
IF vocabIndex[j] - s2 = len THEN
FOR s1: CARDINAL IN [0..len) DO
IF rope.Fetch[s1] # vocab[s2] THEN EXIT;
s2 ¬ s2 + 1;
REPEAT
FINISHED => {token.class ¬ j; GO TO CheckEnd};
ENDLOOP;
IF (h ¬ hashTab[h].link) = 0 THEN EXIT
ENDLOOP;
};
token.value.r ¬ IdFromRope[rope, tokpos];
};
tokenDECIMAL, tokenOCTAL, tokenHEX => {
an OCTAL literal
ENABLE Convert.Error => GO TO badNumber;
token.class ¬ tokenLNUM;
token.value.r ¬ WrapLit[NEW[DINT ¬ Convert.DIntFromRope[rope]]];
};
tokenREAL => {
a REAL literal
ENABLE Convert.Error, Real.RealException => TRUSTED{GO TO badNumber};
token.class ¬ tokenFLNUM;
token.value.r ¬ WrapLit[NEW[REAL ¬ Convert.RealFromLiteral[rope]]];
};
tokenROPE => {
a ROPE literal
token.class ¬ tokenSTR;
token.value.r ¬ WrapLit[NEW[ROPE ¬ Convert.RopeFromLiteral[rope]]];
};
tokenCHAR => {
a CHAR literal
token.class ¬ tokenCHAR;
token.value.r ¬ WrapLit[NEW[CHAR ¬ Convert.CharFromLiteral[rope]]];
};
tokenATOM => {
an ATOM literal
token.class ¬ tokenATOM;
token.value.r ¬ WrapLit[NEW[ATOM ¬ Convert.AtomFromRope[rope]]];
};
tokenSINGLE => {
a single-character token
XeroxOtherDollar: CHAR = 244C; -- ¤
XeroxLeftArrow: CHAR = 254C; -- ¬
XeroxUpArrow: CHAR = 255C; -- ­
XeroxMultiply: CHAR = 264C; -- ´
XeroxDivide: CHAR = 270C; -- ¸
c: CHAR ¬ rf.Fetch[tokpos];
SELECT c FROM
XeroxOtherDollar => c ¬ '$;
XeroxLeftArrow => c ¬ '←
XeroxUpArrow => c ¬ '^;
XeroxMultiply => c ¬ '*;
XeroxDivide => c ¬ '/;
ENDCASE;
IF c = ': AND NOT IO.EndOf[rs] AND IO.PeekChar[rs]='=
THEN {
Should really come through as a tokenDOUBLE; ought to change IOScanImpl.
[] ¬ IO.GetChar[rs];
rope ¬ ":=";
toklen ¬ 2;
token.class ¬ scanTab['←];
}
ELSE { token.class ¬ scanTab[c] };
};
tokenDOUBLE => {
a double-character token
c1: CHAR ¬ rf.Fetch[tokpos];
SELECT c1 FROM
'* => token.class ¬ tokenPOWER; -- for **
'= => token.class ¬ tokenARROW; -- for ==
'< => token.class ¬ tokenLE; -- for <=
'> => token.class ¬ tokenGE; -- for >=
'. => token.class ¬ tokenDOTS; -- for ..
': => token.class ¬ scanTab['←]; -- for :=
'~ => SELECT rf.Fetch[tokpos+1] FROM
'= => token.class ¬ scanTab['#]; -- for ~=
'< => token.class ¬ tokenGE; -- for ~<
'> => token.class ¬ tokenLE; -- for ~>
ENDCASE => GO TO syntaxError;
ENDCASE => GO TO syntaxError;
};
tokenCOMMENT => {
a comment
LOOP;
};
tokenEOF => {
token.class ¬ endMarker;
token.value ¬ MPP1.nullValue;
};
tokenERROR => GO TO syntaxError;
token.msg describes the scanning error
ENDCASE => ERROR; -- all cases should have been covered
EXIT;
REPEAT
syntaxError => {
ErrorContext["Syntax error", tokpos, errPut];
};
badNumber => {
ErrorContext["invalid number", tokpos, errPut];
};
CheckEnd => {};
ENDLOOP;
Every token return must come through here
nTokens ¬ nTokens + 1;
lastToken ¬ tokpos;
RETURN;
};
numerical conversion
WrapLit: PROC [r: REF ANY] RETURNS [LTIndex] = {
RETURN [NEW[LTNode ¬ [index: tokpos, value: r, literal: rf.Flatten[tokpos, toklen]]]]};
initialization/finalization
ScanInit: PUBLIC SAFE PROC [source: ROPE] RETURNS [vIndex: IndexTableRef, vBody: VocabularyRef] = TRUSTED {
IF vocab = NIL THEN {
scanTab ¬ MPParseTable.InitScanTable[];
hashTab ¬ MPParseTable.InitHashTable[];
vocabIndex ¬ MPParseTable.InitIndexTable[];
vocab ¬ MPParseTable.InitVocabulary[];
};
<<
hashTab ¬ @table[table.scanTable.hashTab];
scanTab ¬ @table[table.scanTable.scanTab];
vocab ¬ LOOPHOLE[@table[table.scanTable.vocabBody]];
vocabIndex ¬ @table[table.scanTable.vocabIndex];
>>
rf ¬ source;
rs ¬ IO.RIS[rf];
tokpos ¬ 0;
lastToken ¬ 0;
nTokens ¬ nErrors ¬ 0;
RETURN[vIndex, vBody];
};
ScanReset: PUBLIC SAFE PROC RETURNS [CARDINAL, CARDINAL] = TRUSTED {
rf ¬ NIL;
rs ¬ NIL;
RETURN [nTokens, nErrors];
};
error handling
ResetScanIndex: PUBLIC SAFE PROC [index: INT] RETURNS [success: BOOL] = TRUSTED {
tokpos ¬ index;
RETURN [TRUE];
};
ErrorContext: PUBLIC SAFE PROC [message: ROPE, tokenIndex: INT, put: IO.STREAM] = TRUSTED {
low: INT ¬ tokenIndex - 40;
high: INT ¬ tokenIndex + 40;
nErrors ¬ nErrors + 1;
IF low < 0 THEN low ¬ 0;
IF high >= rf.Size[] THEN high ¬ rf.Size[]-1;
put.PutChar['\n];
IF low > 0 THEN put.PutRope["..."];
FOR i: INT IN [low..high] DO
c: CHAR ¬ rf.Fetch[i];
IF i = tokenIndex THEN put.PutRope[" *^* "];
put.PutChar[c];
ENDLOOP;
IF high < rf.Size[]-1 THEN put.PutRope["..."];
put.PutChar['\n];
put.PutRope[message];
put.PutChar['\n];
};
END.