[_CDCSL_93-16_]<1>Cedar>release>MesaParser>MPScanner.mesa

MPScanner.mesa

Russ Atkinson (RRA) May 13, 1987 10:45:07 pm PDT

Last tweaked by Mike Spreitzer on December 2, 1987 4:17:27 pm PST

JKF August 4, 1989 9:44:53 am PDT

Michael Plass, March 22, 1993 4:48 pm PST

DIRECTORY

Convert,

IO,

MPLeaves USING [HTIndex, HTNode, LTIndex, LTNode],

MPP1,

MPParseTable,

Real USING [RealException],

Rope USING [Fetch, Flatten, Length, ROPE, Size];

MPScanner: PROGRAM

IMPORTS Convert, IO, MPParseTable, Real, Rope

EXPORTS MPP1

SHARES Rope

= BEGIN OPEN MPLeaves, MPParseTable, MPP1;

ROPE: TYPE = Rope.ROPE;

STREAM: TYPE = IO.STREAM;

stuff supporting the scanner

hashTab: HashTableRef;

scanTab: ScanTableRef ¬ NIL;

vocab: VocabularyRef ¬ NIL;

vocabIndex: IndexTableRef ¬ NIL;

<<

hashTab: LONG POINTER TO ARRAY HashIndex OF VocabHashEntry ¬ NIL;

scanTab: LONG POINTER TO ARRAY CHAR [40C..177C] OF TSymbol ¬ NIL;

vocab: LONG STRING ¬ NIL;

vocabIndex: LONG POINTER TO ARRAY TSymbol OF CARDINAL ¬ NIL; >>

rf: ROPE ¬ NIL; -- the source

rs: STREAM ¬ NIL; -- the source as stream

toklen: NAT ¬ 0; -- current token length

tokpos: INT ¬ 0; -- source index for start of token

nTokens: CARDINAL ¬ 0; -- token count

nErrors: CARDINAL; -- lexical errors

lastToken: INT ¬ 0;

IdFromRope: PROC [r: ROPE, index: INT] RETURNS [HTIndex] = {

RETURN [NEW[HTNode ¬ [index: index, name: r]]]};

IdFirst: HTIndex ¬ IdFromRope["first", LAST[INT]];

IDLock: HTIndex ¬ IdFromRope["LOCK", LAST[INT]];

IDRest: HTIndex ¬ IdFromRope["rest", LAST[INT]];

IdOfFirst: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IdFirst]};

IdOfLock: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IDLock]};

IdOfRest: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IDRest]};

Atom: PUBLIC SAFE PROC [errPut: IO.STREAM] RETURNS [token: MPP1.Token ¬ MPP1.nullToken] = TRUSTED {

tokenKind: IO.TokenKind;

rope: ROPE;

charsSkipped: INT;

DO

peek: CHAR ¬ 0C;

[] ¬ IO.SkipWhitespace[rs, TRUE ! IO.EndOfStream => EXIT];

peek ¬ IO.PeekChar[rs ! IO.EndOfStream => EXIT];

tokpos ¬ IO.GetIndex[rs];

token.index ¬ tokpos;

SELECT peek FROM

'%, '& => {

special case of identifier, don't let IO.GetCedarTokenRope see it!

DO

[] ¬ IO.GetChar[rs];

peek ¬ IO.PeekChar[rs ! IO.EndOfStream => EXIT];

SELECT peek FROM

IN ['a..'z], IN ['A..'Z], IN ['0..'9], '%, '& => {};

ENDCASE => EXIT;

ENDLOOP;

toklen ¬ IO.GetIndex[rs] - tokpos;

token.class ¬ tokenID;

token.value.r ¬ IdFromRope[rf.Flatten[tokpos, toklen], tokpos];

EXIT;

};

ENDCASE;

[tokenKind, rope, charsSkipped] ¬ IO.GetCedarTokenRope[rs

!

IO.Error => {

ErrorContext["Syntax error", IO.GetIndex[rs], errPut]; EXIT};

IO.EndOfStream => {

ErrorContext["Unexpected end of stream", IO.GetIndex[rs], errPut]; EXIT

tokenKind ¬ tokenEOF;

rope ¬ "";

CONTINUE;

}

];

toklen ¬ rope.Length[];

tokpos ¬ rs.GetIndex[] - toklen;

SELECT tokenKind FROM

tokenID => {

an identifier or reserved word

allcaps: BOOL ¬ TRUE;

r: ROPE ¬ NIL;

token.class ¬ tokenID;

FOR i: INT IN [0..rope.Size[]) DO

IF rope.Fetch[i] IN ['a..'z] THEN {allcaps ¬ FALSE; EXIT};

ENDLOOP;

IF allcaps THEN {

This could be a reserved word...

first: CARDINAL ¬ ORD[rope.Fetch[0]];

last: CARDINAL ¬ ORD[rope.Fetch[rope.Size[]-1]];

h: CARDINAL ¬ (first * 128 - first + last) MOD LAST[HashIndex] + 1;

j: CARDINAL ¬ 0;

len: NAT ¬ rope.Size[];

WHILE (j ¬ hashTab[h].symbol) # 0 DO

s2: CARDINAL ¬ vocabIndex[j - 1];

IF vocabIndex[j] - s2 = len THEN

FOR s1: CARDINAL IN [0..len) DO

IF rope.Fetch[s1] # vocab[s2] THEN EXIT;

s2 ¬ s2 + 1;

REPEAT

FINISHED => {token.class ¬ j; GO TO CheckEnd};

ENDLOOP;

IF (h ¬ hashTab[h].link) = 0 THEN EXIT

ENDLOOP;

};

token.value.r ¬ IdFromRope[rope, tokpos];

};

tokenDECIMAL, tokenOCTAL, tokenHEX => {

an OCTAL literal

ENABLE Convert.Error => GO TO badNumber;

token.class ¬ tokenLNUM;

token.value.r ¬ WrapLit[NEW[DINT ¬ Convert.DIntFromRope[rope]]];

};

tokenREAL => {

a REAL literal

ENABLE Convert.Error, Real.RealException => TRUSTED{GO TO badNumber};

token.class ¬ tokenFLNUM;

token.value.r ¬ WrapLit[NEW[REAL ¬ Convert.RealFromLiteral[rope]]];

};

tokenROPE => {

a ROPE literal

token.class ¬ tokenSTR;

token.value.r ¬ WrapLit[NEW[ROPE ¬ Convert.RopeFromLiteral[rope]]];

};

tokenCHAR => {

a CHAR literal

token.class ¬ tokenCHAR;

token.value.r ¬ WrapLit[NEW[CHAR ¬ Convert.CharFromLiteral[rope]]];

};

tokenATOM => {

an ATOM literal

token.class ¬ tokenATOM;

token.value.r ¬ WrapLit[NEW[ATOM ¬ Convert.AtomFromRope[rope]]];

};

tokenSINGLE => {

a single-character token

XeroxOtherDollar: CHAR = 244C; -- ¤

XeroxLeftArrow: CHAR = 254C; -- ¬

XeroxUpArrow: CHAR = 255C; --

XeroxMultiply: CHAR = 264C; -- ´

XeroxDivide: CHAR = 270C; -- ¸

c: CHAR ¬ rf.Fetch[tokpos];

SELECT c FROM

XeroxOtherDollar => c ¬ '$;

XeroxLeftArrow => c ¬ '←

XeroxUpArrow => c ¬ '^;

XeroxMultiply => c ¬ '*;

XeroxDivide => c ¬ '/;

ENDCASE;

IF c = ': AND NOT IO.EndOf[rs] AND IO.PeekChar[rs]='=

THEN {

Should really come through as a tokenDOUBLE; ought to change IOScanImpl.

[] ¬ IO.GetChar[rs];

rope ¬ ":=";

toklen ¬ 2;

token.class ¬ scanTab['←];

}

ELSE { token.class ¬ scanTab[c] };

};

tokenDOUBLE => {

a double-character token

c1: CHAR ¬ rf.Fetch[tokpos];

SELECT c1 FROM

'* => token.class ¬ tokenPOWER; -- for **

'= => token.class ¬ tokenARROW; -- for ==

'< => token.class ¬ tokenLE; -- for <=

'> => token.class ¬ tokenGE; -- for >=

'. => token.class ¬ tokenDOTS; -- for ..

': => token.class ¬ scanTab['←]; -- for :=

'~ => SELECT rf.Fetch[tokpos+1] FROM

'= => token.class ¬ scanTab['#]; -- for ~=

'< => token.class ¬ tokenGE; -- for ~<

'> => token.class ¬ tokenLE; -- for ~>

ENDCASE => GO TO syntaxError;

};

tokenCOMMENT => {

a comment

LOOP;

};

tokenEOF => {

token.class ¬ endMarker;

token.value ¬ MPP1.nullValue;

};

tokenERROR => GO TO syntaxError;

token.msg describes the scanning error

ENDCASE => ERROR; -- all cases should have been covered

EXIT;

REPEAT

syntaxError => {

ErrorContext["Syntax error", tokpos, errPut];

};

badNumber => {

ErrorContext["invalid number", tokpos, errPut];

};

CheckEnd => {};

ENDLOOP;

Every token return must come through here

nTokens ¬ nTokens + 1;

lastToken ¬ tokpos;

RETURN;

};

numerical conversion

WrapLit: PROC [r: REF ANY] RETURNS [LTIndex] = {

RETURN [NEW[LTNode ¬ [index: tokpos, value: r, literal: rf.Flatten[tokpos, toklen]]]]};

initialization/finalization

ScanInit: PUBLIC SAFE PROC [source: ROPE] RETURNS [vIndex: IndexTableRef, vBody: VocabularyRef] = TRUSTED {

IF vocab = NIL THEN {

scanTab ¬ MPParseTable.InitScanTable[];

hashTab ¬ MPParseTable.InitHashTable[];

vocabIndex ¬ MPParseTable.InitIndexTable[];

vocab ¬ MPParseTable.InitVocabulary[];

};

<<

hashTab ¬ @table[table.scanTable.hashTab];

scanTab ¬ @table[table.scanTable.scanTab];

vocab ¬ LOOPHOLE[@table[table.scanTable.vocabBody]];

vocabIndex ¬ @table[table.scanTable.vocabIndex];

>>

rf ¬ source;

rs ¬ IO.RIS[rf];

tokpos ¬ 0;

lastToken ¬ 0;

nTokens ¬ nErrors ¬ 0;

RETURN[vIndex, vBody];

};

ScanReset: PUBLIC SAFE PROC RETURNS [CARDINAL, CARDINAL] = TRUSTED {

rf ¬ NIL;

rs ¬ NIL;

RETURN [nTokens, nErrors];

};

error handling

ResetScanIndex: PUBLIC SAFE PROC [index: INT] RETURNS [success: BOOL] = TRUSTED {

tokpos ¬ index;

RETURN [TRUE];

};

ErrorContext: PUBLIC SAFE PROC [message: ROPE, tokenIndex: INT, put: IO.STREAM] = TRUSTED {

low: INT ¬ tokenIndex - 40;

high: INT ¬ tokenIndex + 40;

nErrors ¬ nErrors + 1;

IF low < 0 THEN low ¬ 0;

IF high >= rf.Size[] THEN high ¬ rf.Size[]-1;

put.PutChar['\n];

IF low > 0 THEN put.PutRope["..."];

FOR i: INT IN [low..high] DO

c: CHAR ¬ rf.Fetch[i];

IF i = tokenIndex THEN put.PutRope[" *^* "];

put.PutChar[c];

ENDLOOP;

IF high < rf.Size[]-1 THEN put.PutRope["..."];

put.PutChar['\n];

put.PutRope[message];

put.PutChar['\n];

};

END.