MPScanner.mesa
Copyright Ó 1985, 1986, 1987, 1990, 1991 by Xerox Corporation. All rights reserved.
Russ Atkinson (RRA) May 13, 1987 10:45:07 pm PDT
Last tweaked by Mike Spreitzer on December 2, 1987 4:17:27 pm PST
JKF August 4, 1989 9:44:53 am PDT
Michael Plass, March 22, 1993 4:48 pm PST
DIRECTORY
Convert,
IO,
MPLeaves USING [HTIndex, HTNode, LTIndex, LTNode],
MPP1,
MPParseTable,
Real USING [RealException],
Rope USING [Fetch, Flatten, Length, ROPE, Size];
MPScanner:
PROGRAM
IMPORTS Convert, IO, MPParseTable, Real, Rope
EXPORTS MPP1
SHARES Rope
= BEGIN OPEN MPLeaves, MPParseTable, MPP1;
ROPE: TYPE = Rope.ROPE;
STREAM: TYPE = IO.STREAM;
stuff supporting the scanner
hashTab: HashTableRef;
scanTab: ScanTableRef ¬ NIL;
vocab: VocabularyRef ¬ NIL;
vocabIndex: IndexTableRef ¬ NIL;
<<
hashTab: LONG POINTER TO ARRAY HashIndex OF VocabHashEntry ¬ NIL;
scanTab: LONG POINTER TO ARRAY CHAR [40C..177C] OF TSymbol ¬ NIL;
vocab: LONG STRING ¬ NIL;
vocabIndex: LONG POINTER TO ARRAY TSymbol OF CARDINAL ¬ NIL; >>
rf: ROPE ¬ NIL; -- the source
rs: STREAM ¬ NIL; -- the source as stream
toklen: NAT ¬ 0; -- current token length
tokpos: INT ¬ 0; -- source index for start of token
nTokens: CARDINAL ¬ 0; -- token count
nErrors: CARDINAL; -- lexical errors
lastToken: INT ¬ 0;
IdFromRope:
PROC [r:
ROPE, index:
INT]
RETURNS [HTIndex] = {
RETURN [NEW[HTNode ¬ [index: index, name: r]]]};
IdFirst: HTIndex ¬ IdFromRope["first", LAST[INT]];
IDLock: HTIndex ¬ IdFromRope["LOCK", LAST[INT]];
IDRest: HTIndex ¬ IdFromRope["rest", LAST[INT]];
IdOfFirst: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IdFirst]};
IdOfLock: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IDLock]};
IdOfRest:
PUBLIC
SAFE
PROC
RETURNS [HTIndex] =
TRUSTED {
RETURN [IDRest]};
Atom:
PUBLIC
SAFE
PROC [errPut:
IO.
STREAM]
RETURNS [token:
MPP1.Token ¬
MPP1.nullToken] =
TRUSTED {
tokenKind: IO.TokenKind;
rope: ROPE;
charsSkipped: INT;
DO
peek: CHAR ¬ 0C;
[] ¬ IO.SkipWhitespace[rs, TRUE ! IO.EndOfStream => EXIT];
peek ¬ IO.PeekChar[rs ! IO.EndOfStream => EXIT];
tokpos ¬ IO.GetIndex[rs];
token.index ¬ tokpos;
SELECT peek
FROM
'%, '& => {
special case of identifier, don't let IO.GetCedarTokenRope see it!
DO
[] ¬ IO.GetChar[rs];
peek ¬ IO.PeekChar[rs ! IO.EndOfStream => EXIT];
SELECT peek
FROM
IN ['a..'z], IN ['A..'Z], IN ['0..'9], '%, '& => {};
ENDCASE => EXIT;
ENDLOOP;
toklen ¬ IO.GetIndex[rs] - tokpos;
token.class ¬ tokenID;
token.value.r ¬ IdFromRope[rf.Flatten[tokpos, toklen], tokpos];
EXIT;
};
ENDCASE;
[tokenKind, rope, charsSkipped] ¬
IO.GetCedarTokenRope[rs
!
IO.Error => {
ErrorContext["Syntax error", IO.GetIndex[rs], errPut]; EXIT};
IO.EndOfStream => {
ErrorContext["Unexpected end of stream", IO.GetIndex[rs], errPut]; EXIT
tokenKind ¬ tokenEOF;
rope ¬ "";
CONTINUE;
}
];
toklen ¬ rope.Length[];
tokpos ¬ rs.GetIndex[] - toklen;
SELECT tokenKind
FROM
tokenID => {
an identifier or reserved word
allcaps: BOOL ¬ TRUE;
r: ROPE ¬ NIL;
token.class ¬ tokenID;
FOR i:
INT
IN [0..rope.Size[])
DO
IF rope.Fetch[i] IN ['a..'z] THEN {allcaps ¬ FALSE; EXIT};
ENDLOOP;
IF allcaps
THEN {
This could be a reserved word...
first: CARDINAL ¬ ORD[rope.Fetch[0]];
last: CARDINAL ¬ ORD[rope.Fetch[rope.Size[]-1]];
h: CARDINAL ¬ (first * 128 - first + last) MOD LAST[HashIndex] + 1;
j: CARDINAL ¬ 0;
len: NAT ¬ rope.Size[];
WHILE (j ¬ hashTab[h].symbol) # 0
DO
s2: CARDINAL ¬ vocabIndex[j - 1];
IF vocabIndex[j] - s2 = len
THEN
FOR s1:
CARDINAL
IN [0..len)
DO
IF rope.Fetch[s1] # vocab[s2] THEN EXIT;
s2 ¬ s2 + 1;
REPEAT
FINISHED => {token.class ¬ j; GO TO CheckEnd};
ENDLOOP;
IF (h ¬ hashTab[h].link) = 0 THEN EXIT
ENDLOOP;
};
token.value.r ¬ IdFromRope[rope, tokpos];
};
tokenDECIMAL, tokenOCTAL, tokenHEX => {
an OCTAL literal
ENABLE Convert.Error => GO TO badNumber;
token.class ¬ tokenLNUM;
token.value.r ¬ WrapLit[NEW[DINT ¬ Convert.DIntFromRope[rope]]];
};
tokenREAL => {
a REAL literal
ENABLE Convert.Error, Real.RealException => TRUSTED{GO TO badNumber};
token.class ¬ tokenFLNUM;
token.value.r ¬ WrapLit[NEW[REAL ¬ Convert.RealFromLiteral[rope]]];
};
tokenROPE => {
a ROPE literal
token.class ¬ tokenSTR;
token.value.r ¬ WrapLit[NEW[ROPE ¬ Convert.RopeFromLiteral[rope]]];
};
tokenCHAR => {
a CHAR literal
token.class ¬ tokenCHAR;
token.value.r ¬ WrapLit[NEW[CHAR ¬ Convert.CharFromLiteral[rope]]];
};
tokenATOM => {
an ATOM literal
token.class ¬ tokenATOM;
token.value.r ¬ WrapLit[NEW[ATOM ¬ Convert.AtomFromRope[rope]]];
};
tokenSINGLE => {
a single-character token
XeroxOtherDollar: CHAR = 244C; -- ¤
XeroxLeftArrow: CHAR = 254C; -- ¬
XeroxUpArrow: CHAR = 255C; --
XeroxMultiply: CHAR = 264C; -- ´
XeroxDivide: CHAR = 270C; -- ¸
c: CHAR ¬ rf.Fetch[tokpos];
SELECT c
FROM
XeroxOtherDollar => c ¬ '$;
XeroxLeftArrow => c ¬ '←
XeroxUpArrow => c ¬ '^;
XeroxMultiply => c ¬ '*;
XeroxDivide => c ¬ '/;
ENDCASE;
IF c = ':
AND
NOT
IO.EndOf[rs]
AND
IO.PeekChar[rs]='=
THEN {
Should really come through as a tokenDOUBLE; ought to change IOScanImpl.
[] ¬ IO.GetChar[rs];
rope ¬ ":=";
toklen ¬ 2;
token.class ¬ scanTab['←];
}
ELSE { token.class ¬ scanTab[c] };
};
tokenDOUBLE => {
a double-character token
c1: CHAR ¬ rf.Fetch[tokpos];
SELECT c1
FROM
'* => token.class ¬ tokenPOWER; -- for **
'= => token.class ¬ tokenARROW; -- for ==
'< => token.class ¬ tokenLE; -- for <=
'> => token.class ¬ tokenGE; -- for >=
'. => token.class ¬ tokenDOTS; -- for ..
': => token.class ¬ scanTab['←]; -- for :=
'~ =>
SELECT rf.Fetch[tokpos+1]
FROM
'= => token.class ¬ scanTab['#]; -- for ~=
'< => token.class ¬ tokenGE; -- for ~<
'> => token.class ¬ tokenLE; -- for ~>
ENDCASE => GO TO syntaxError;
ENDCASE => GO TO syntaxError;
};
tokenCOMMENT => {
a comment
LOOP;
};
tokenEOF => {
token.class ¬ endMarker;
token.value ¬ MPP1.nullValue;
};
tokenERROR =>
GO
TO syntaxError;
token.msg describes the scanning error
ENDCASE => ERROR; -- all cases should have been covered
EXIT;
REPEAT
syntaxError => {
ErrorContext["Syntax error", tokpos, errPut];
};
badNumber => {
ErrorContext["invalid number", tokpos, errPut];
};
CheckEnd => {};
ENDLOOP;
Every token return must come through here
nTokens ¬ nTokens + 1;
lastToken ¬ tokpos;
RETURN;
};
numerical conversion
WrapLit:
PROC [r:
REF
ANY]
RETURNS [LTIndex] = {
RETURN [NEW[LTNode ¬ [index: tokpos, value: r, literal: rf.Flatten[tokpos, toklen]]]]};
initialization/finalization
ScanInit:
PUBLIC
SAFE
PROC [source:
ROPE]
RETURNS [vIndex: IndexTableRef, vBody: VocabularyRef] =
TRUSTED {
IF vocab =
NIL
THEN {
scanTab ¬ MPParseTable.InitScanTable[];
hashTab ¬ MPParseTable.InitHashTable[];
vocabIndex ¬ MPParseTable.InitIndexTable[];
vocab ¬ MPParseTable.InitVocabulary[];
};
<<
hashTab ¬ @table[table.scanTable.hashTab];
scanTab ¬ @table[table.scanTable.scanTab];
vocab ¬ LOOPHOLE[@table[table.scanTable.vocabBody]];
vocabIndex ¬ @table[table.scanTable.vocabIndex];
>>
rf ¬ source;
rs ¬ IO.RIS[rf];
tokpos ¬ 0;
lastToken ¬ 0;
nTokens ¬ nErrors ¬ 0;
RETURN[vIndex, vBody];
};
ScanReset:
PUBLIC
SAFE
PROC
RETURNS [
CARDINAL,
CARDINAL] =
TRUSTED {
rf ¬ NIL;
rs ¬ NIL;
RETURN [nTokens, nErrors];
};
error handling
ResetScanIndex:
PUBLIC
SAFE
PROC [index:
INT]
RETURNS [success:
BOOL] =
TRUSTED {
tokpos ¬ index;
RETURN [TRUE];
};
ErrorContext:
PUBLIC
SAFE
PROC [message:
ROPE, tokenIndex:
INT, put:
IO.
STREAM] =
TRUSTED {
low: INT ¬ tokenIndex - 40;
high: INT ¬ tokenIndex + 40;
nErrors ¬ nErrors + 1;
IF low < 0 THEN low ¬ 0;
IF high >= rf.Size[] THEN high ¬ rf.Size[]-1;
put.PutChar['\n];
IF low > 0 THEN put.PutRope["..."];
FOR i:
INT
IN [low..high]
DO
c: CHAR ¬ rf.Fetch[i];
IF i = tokenIndex THEN put.PutRope[" *^* "];
put.PutChar[c];
ENDLOOP;
IF high < rf.Size[]-1 THEN put.PutRope["..."];
put.PutChar['\n];
put.PutRope[message];
put.PutChar['\n];
};