DIRECTORY Convert, IO, MPLeaves USING [HTIndex, HTNode, LTIndex, LTNode], MPP1, MPParseTable, Real USING [RealException], Rope USING [Fetch, Flatten, Length, ROPE, Size]; MPScanner: PROGRAM IMPORTS Convert, IO, MPParseTable, Real, Rope EXPORTS MPP1 SHARES Rope = BEGIN OPEN MPLeaves, MPParseTable, MPP1; ROPE: TYPE = Rope.ROPE; STREAM: TYPE = IO.STREAM; hashTab: HashTableRef; scanTab: ScanTableRef ¬ NIL; vocab: VocabularyRef ¬ NIL; vocabIndex: IndexTableRef ¬ NIL; << hashTab: LONG POINTER TO ARRAY HashIndex OF VocabHashEntry ¬ NIL; scanTab: LONG POINTER TO ARRAY CHAR [40C..177C] OF TSymbol ¬ NIL; vocab: LONG STRING ¬ NIL; vocabIndex: LONG POINTER TO ARRAY TSymbol OF CARDINAL ¬ NIL; >> rf: ROPE ¬ NIL; -- the source rs: STREAM ¬ NIL; -- the source as stream toklen: NAT ¬ 0; -- current token length tokpos: INT ¬ 0; -- source index for start of token nTokens: CARDINAL ¬ 0; -- token count nErrors: CARDINAL; -- lexical errors lastToken: INT ¬ 0; IdFromRope: PROC [r: ROPE, index: INT] RETURNS [HTIndex] = { RETURN [NEW[HTNode ¬ [index: index, name: r]]]}; IdFirst: HTIndex ¬ IdFromRope["first", LAST[INT]]; IDLock: HTIndex ¬ IdFromRope["LOCK", LAST[INT]]; IDRest: HTIndex ¬ IdFromRope["rest", LAST[INT]]; IdOfFirst: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IdFirst]}; IdOfLock: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IDLock]}; IdOfRest: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IDRest]}; Atom: PUBLIC SAFE PROC [errPut: IO.STREAM] RETURNS [token: MPP1.Token ¬ MPP1.nullToken] = TRUSTED { tokenKind: IO.TokenKind; rope: ROPE; charsSkipped: INT; DO peek: CHAR ¬ 0C; [] ¬ IO.SkipWhitespace[rs, TRUE ! IO.EndOfStream => EXIT]; peek ¬ IO.PeekChar[rs ! IO.EndOfStream => EXIT]; tokpos ¬ IO.GetIndex[rs]; token.index ¬ tokpos; SELECT peek FROM '%, '& => { DO [] ¬ IO.GetChar[rs]; peek ¬ IO.PeekChar[rs ! IO.EndOfStream => EXIT]; SELECT peek FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9], '%, '& => {}; ENDCASE => EXIT; ENDLOOP; toklen ¬ IO.GetIndex[rs] - tokpos; token.class ¬ tokenID; token.value.r ¬ IdFromRope[rf.Flatten[tokpos, toklen], tokpos]; EXIT; }; ENDCASE; [tokenKind, rope, charsSkipped] ¬ IO.GetCedarTokenRope[rs ! IO.Error => { ErrorContext["Syntax error", IO.GetIndex[rs], errPut]; EXIT}; IO.EndOfStream => { tokenKind ¬ tokenEOF; rope ¬ ""; CONTINUE; } ]; toklen ¬ rope.Length[]; tokpos ¬ rs.GetIndex[] - toklen; SELECT tokenKind FROM tokenID => { allcaps: BOOL ¬ TRUE; r: ROPE ¬ NIL; token.class ¬ tokenID; FOR i: INT IN [0..rope.Size[]) DO IF rope.Fetch[i] IN ['a..'z] THEN {allcaps ¬ FALSE; EXIT}; ENDLOOP; IF allcaps THEN { first: CARDINAL ¬ ORD[rope.Fetch[0]]; last: CARDINAL ¬ ORD[rope.Fetch[rope.Size[]-1]]; h: CARDINAL ¬ (first * 128 - first + last) MOD LAST[HashIndex] + 1; j: CARDINAL ¬ 0; len: NAT ¬ rope.Size[]; WHILE (j ¬ hashTab[h].symbol) # 0 DO s2: CARDINAL ¬ vocabIndex[j - 1]; IF vocabIndex[j] - s2 = len THEN FOR s1: CARDINAL IN [0..len) DO IF rope.Fetch[s1] # vocab[s2] THEN EXIT; s2 ¬ s2 + 1; REPEAT FINISHED => {token.class ¬ j; GO TO CheckEnd}; ENDLOOP; IF (h ¬ hashTab[h].link) = 0 THEN EXIT ENDLOOP; }; token.value.r ¬ IdFromRope[rope, tokpos]; }; tokenDECIMAL, tokenOCTAL, tokenHEX => { ENABLE Convert.Error => GO TO badNumber; token.class ¬ tokenLNUM; token.value.r ¬ WrapLit[NEW[DINT ¬ Convert.DIntFromRope[rope]]]; }; tokenREAL => { ENABLE Convert.Error, Real.RealException => TRUSTED{GO TO badNumber}; token.class ¬ tokenFLNUM; token.value.r ¬ WrapLit[NEW[REAL ¬ Convert.RealFromLiteral[rope]]]; }; tokenROPE => { token.class ¬ tokenSTR; token.value.r ¬ WrapLit[NEW[ROPE ¬ Convert.RopeFromLiteral[rope]]]; }; tokenCHAR => { token.class ¬ tokenCHAR; token.value.r ¬ WrapLit[NEW[CHAR ¬ Convert.CharFromLiteral[rope]]]; }; tokenATOM => { token.class ¬ tokenATOM; token.value.r ¬ WrapLit[NEW[ATOM ¬ Convert.AtomFromRope[rope]]]; }; tokenSINGLE => { XeroxOtherDollar: CHAR = 244C; -- € XeroxLeftArrow: CHAR = 254C; -- ¬ XeroxUpArrow: CHAR = 255C; -- ­ XeroxMultiply: CHAR = 264C; -- ΄ XeroxDivide: CHAR = 270C; -- Έ c: CHAR ¬ rf.Fetch[tokpos]; SELECT c FROM XeroxOtherDollar => c ¬ '$; XeroxLeftArrow => c ¬ '_; XeroxUpArrow => c ¬ '^; XeroxMultiply => c ¬ '*; XeroxDivide => c ¬ '/; ENDCASE; IF c = ': AND NOT IO.EndOf[rs] AND IO.PeekChar[rs]='= THEN { [] ¬ IO.GetChar[rs]; rope ¬ ":="; toklen ¬ 2; token.class ¬ scanTab['_]; } ELSE { token.class ¬ scanTab[c] }; }; tokenDOUBLE => { c1: CHAR ¬ rf.Fetch[tokpos]; SELECT c1 FROM '* => token.class ¬ tokenPOWER; -- for ** '= => token.class ¬ tokenARROW; -- for == '< => token.class ¬ tokenLE; -- for <= '> => token.class ¬ tokenGE; -- for >= '. => token.class ¬ tokenDOTS; -- for .. ': => token.class ¬ scanTab['_]; -- for := '~ => SELECT rf.Fetch[tokpos+1] FROM '= => token.class ¬ scanTab['#]; -- for ~= '< => token.class ¬ tokenGE; -- for ~< '> => token.class ¬ tokenLE; -- for ~> ENDCASE => GO TO syntaxError; ENDCASE => GO TO syntaxError; }; tokenCOMMENT => { LOOP; }; tokenEOF => { token.class ¬ endMarker; token.value ¬ MPP1.nullValue; }; tokenERROR => GO TO syntaxError; ENDCASE => ERROR; -- all cases should have been covered EXIT; REPEAT syntaxError => { ErrorContext["Syntax error", tokpos, errPut]; }; badNumber => { ErrorContext["invalid number", tokpos, errPut]; }; CheckEnd => {}; ENDLOOP; nTokens ¬ nTokens + 1; lastToken ¬ tokpos; RETURN; }; WrapLit: PROC [r: REF ANY] RETURNS [LTIndex] = { RETURN [NEW[LTNode ¬ [index: tokpos, value: r, literal: rf.Flatten[tokpos, toklen]]]]}; ScanInit: PUBLIC SAFE PROC [source: ROPE] RETURNS [vIndex: IndexTableRef, vBody: VocabularyRef] = TRUSTED { IF vocab = NIL THEN { scanTab ¬ MPParseTable.InitScanTable[]; hashTab ¬ MPParseTable.InitHashTable[]; vocabIndex ¬ MPParseTable.InitIndexTable[]; vocab ¬ MPParseTable.InitVocabulary[]; }; << hashTab ¬ @table[table.scanTable.hashTab]; scanTab ¬ @table[table.scanTable.scanTab]; vocab ¬ LOOPHOLE[@table[table.scanTable.vocabBody]]; vocabIndex ¬ @table[table.scanTable.vocabIndex]; >> rf ¬ source; rs ¬ IO.RIS[rf]; tokpos ¬ 0; lastToken ¬ 0; nTokens ¬ nErrors ¬ 0; RETURN[vIndex, vBody]; }; ScanReset: PUBLIC SAFE PROC RETURNS [CARDINAL, CARDINAL] = TRUSTED { rf ¬ NIL; rs ¬ NIL; RETURN [nTokens, nErrors]; }; ResetScanIndex: PUBLIC SAFE PROC [index: INT] RETURNS [success: BOOL] = TRUSTED { tokpos ¬ index; RETURN [TRUE]; }; ErrorContext: PUBLIC SAFE PROC [message: ROPE, tokenIndex: INT, put: IO.STREAM] = TRUSTED { low: INT ¬ tokenIndex - 40; high: INT ¬ tokenIndex + 40; nErrors ¬ nErrors + 1; IF low < 0 THEN low ¬ 0; IF high >= rf.Size[] THEN high ¬ rf.Size[]-1; put.PutChar['\n]; IF low > 0 THEN put.PutRope["..."]; FOR i: INT IN [low..high] DO c: CHAR ¬ rf.Fetch[i]; IF i = tokenIndex THEN put.PutRope[" *^* "]; put.PutChar[c]; ENDLOOP; IF high < rf.Size[]-1 THEN put.PutRope["..."]; put.PutChar['\n]; put.PutRope[message]; put.PutChar['\n]; }; END. vMPScanner.mesa Copyright Σ 1985, 1986, 1987, 1990, 1991 by Xerox Corporation. All rights reserved. Russ Atkinson (RRA) May 13, 1987 10:45:07 pm PDT Last tweaked by Mike Spreitzer on December 2, 1987 4:17:27 pm PST JKF August 4, 1989 9:44:53 am PDT Michael Plass, March 22, 1993 4:48 pm PST stuff supporting the scanner special case of identifier, don't let IO.GetCedarTokenRope see it! ErrorContext["Unexpected end of stream", IO.GetIndex[rs], errPut]; EXIT an identifier or reserved word This could be a reserved word... an OCTAL literal a REAL literal a ROPE literal a CHAR literal an ATOM literal a single-character token Should really come through as a tokenDOUBLE; ought to change IOScanImpl. a double-character token a comment token.msg describes the scanning error Every token return must come through here numerical conversion initialization/finalization error handling Κ <•NewlineDelimiter –(cedarcode) style˜codešœ™Kšœ ΟeœI™TK™0K™AK™!K™)—K˜šΟk ˜ Kšœ ˜ Kšžœ˜Kšœ žœ$˜2Kšžœ˜Kšœ ˜ Kšœžœ˜Kšœžœžœ˜0K˜—šΟn œž˜Kšžœ žœ˜-Kšžœž˜ Kšžœ˜ Kšœžœžœžœ˜*K˜Kšžœžœžœ˜Kšžœžœžœžœ˜K˜šœ™K˜Kšœžœ˜Kšœžœ˜Kšœžœ˜ K˜Kš œ žœžœžœžœ žœžœ˜BKšœ žœžœžœžœžœ žœ žœ˜BKšœžœžœžœ˜Kšœ žœžœžœžœ žœžœžœ˜?K˜KšœžœžœΟc ˜Kšœžœžœ ˜*K˜Kšœžœ  ˜-Kšœžœ  "˜6Kšœ žœ  ˜(Kšœ žœ ˜'Kšœ žœ˜K˜—š Ÿ œžœžœ žœžœ˜