<> <> <> <> <> <> DIRECTORY Convert, IO, MPLeaves USING [HTIndex, HTNode, LTIndex, LTNode], MPP1, MPParseTable, Real USING [RealException], Rope USING [Fetch, Flatten, Length, ROPE, Size]; MPScanner: PROGRAM IMPORTS Convert, IO, MPParseTable, Real, Rope EXPORTS MPP1 SHARES Rope = BEGIN OPEN MPLeaves, MPParseTable, MPP1; ROPE: TYPE = Rope.ROPE; STREAM: TYPE = IO.STREAM; <> hashTab: HashTableRef; scanTab: ScanTableRef ¬ NIL; vocab: VocabularyRef ¬ NIL; vocabIndex: IndexTableRef ¬ NIL; << hashTab: LONG POINTER TO ARRAY HashIndex OF VocabHashEntry ¬ NIL; scanTab: LONG POINTER TO ARRAY CHAR [40C..177C] OF TSymbol ¬ NIL; vocab: LONG STRING ¬ NIL; vocabIndex: LONG POINTER TO ARRAY TSymbol OF CARDINAL ¬ NIL; >> rf: ROPE ¬ NIL; -- the source rs: STREAM ¬ NIL; -- the source as stream toklen: NAT ¬ 0; -- current token length tokpos: INT ¬ 0; -- source index for start of token nTokens: CARDINAL ¬ 0; -- token count nErrors: CARDINAL; -- lexical errors lastToken: INT ¬ 0; IdFromRope: PROC [r: ROPE, index: INT] RETURNS [HTIndex] = { RETURN [NEW[HTNode ¬ [index: index, name: r]]]}; IdFirst: HTIndex ¬ IdFromRope["first", LAST[INT]]; IDLock: HTIndex ¬ IdFromRope["LOCK", LAST[INT]]; IDRest: HTIndex ¬ IdFromRope["rest", LAST[INT]]; IdOfFirst: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IdFirst]}; IdOfLock: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IDLock]}; IdOfRest: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IDRest]}; Atom: PUBLIC SAFE PROC [errPut: IO.STREAM] RETURNS [token: MPP1.Token ¬ MPP1.nullToken] = TRUSTED { tokenKind: IO.TokenKind; rope: ROPE; charsSkipped: INT; DO peek: CHAR ¬ 0C; [] ¬ IO.SkipWhitespace[rs, TRUE ! IO.EndOfStream => EXIT]; peek ¬ IO.PeekChar[rs ! IO.EndOfStream => EXIT]; tokpos ¬ IO.GetIndex[rs]; token.index ¬ tokpos; SELECT peek FROM '%, '& => { <> DO [] ¬ IO.GetChar[rs]; peek ¬ IO.PeekChar[rs ! IO.EndOfStream => EXIT]; SELECT peek FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9], '%, '& => {}; ENDCASE => EXIT; ENDLOOP; toklen ¬ IO.GetIndex[rs] - tokpos; token.class ¬ tokenID; token.value.r ¬ IdFromRope[rf.Flatten[tokpos, toklen], tokpos]; EXIT; }; ENDCASE; [tokenKind, rope, charsSkipped] ¬ IO.GetCedarTokenRope[rs ! IO.Error => { ErrorContext["Syntax error", IO.GetIndex[rs], errPut]; EXIT}; IO.EndOfStream => { <> tokenKind ¬ tokenEOF; rope ¬ ""; CONTINUE; } ]; toklen ¬ rope.Length[]; tokpos ¬ rs.GetIndex[] - toklen; SELECT tokenKind FROM tokenID => { <> allcaps: BOOL ¬ TRUE; r: ROPE ¬ NIL; token.class ¬ tokenID; FOR i: INT IN [0..rope.Size[]) DO IF rope.Fetch[i] IN ['a..'z] THEN {allcaps ¬ FALSE; EXIT}; ENDLOOP; IF allcaps THEN { <> first: CARDINAL ¬ ORD[rope.Fetch[0]]; last: CARDINAL ¬ ORD[rope.Fetch[rope.Size[]-1]]; h: CARDINAL ¬ (first * 128 - first + last) MOD LAST[HashIndex] + 1; j: CARDINAL ¬ 0; len: NAT ¬ rope.Size[]; WHILE (j ¬ hashTab[h].symbol) # 0 DO s2: CARDINAL ¬ vocabIndex[j - 1]; IF vocabIndex[j] - s2 = len THEN FOR s1: CARDINAL IN [0..len) DO IF rope.Fetch[s1] # vocab[s2] THEN EXIT; s2 ¬ s2 + 1; REPEAT FINISHED => {token.class ¬ j; GO TO CheckEnd}; ENDLOOP; IF (h ¬ hashTab[h].link) = 0 THEN EXIT ENDLOOP; }; token.value.r ¬ IdFromRope[rope, tokpos]; }; tokenDECIMAL, tokenOCTAL, tokenHEX => { <> ENABLE Convert.Error => GO TO badNumber; token.class ¬ tokenLNUM; token.value.r ¬ WrapLit[NEW[DINT ¬ Convert.DIntFromRope[rope]]]; }; tokenREAL => { <> ENABLE Convert.Error, Real.RealException => TRUSTED{GO TO badNumber}; token.class ¬ tokenFLNUM; token.value.r ¬ WrapLit[NEW[REAL ¬ Convert.RealFromLiteral[rope]]]; }; tokenROPE => { <> token.class ¬ tokenSTR; token.value.r ¬ WrapLit[NEW[ROPE ¬ Convert.RopeFromLiteral[rope]]]; }; tokenCHAR => { <> token.class ¬ tokenCHAR; token.value.r ¬ WrapLit[NEW[CHAR ¬ Convert.CharFromLiteral[rope]]]; }; tokenATOM => { <> token.class ¬ tokenATOM; token.value.r ¬ WrapLit[NEW[ATOM ¬ Convert.AtomFromRope[rope]]]; }; tokenSINGLE => { <> XeroxOtherDollar: CHAR = 244C; -- ¤ XeroxLeftArrow: CHAR = 254C; -- ¬ XeroxUpArrow: CHAR = 255C; -- ­ XeroxMultiply: CHAR = 264C; -- ´ XeroxDivide: CHAR = 270C; -- ¸ c: CHAR ¬ rf.Fetch[tokpos]; SELECT c FROM XeroxOtherDollar => c ¬ '$; XeroxLeftArrow => c ¬ '_; XeroxUpArrow => c ¬ '^; XeroxMultiply => c ¬ '*; XeroxDivide => c ¬ '/; ENDCASE; IF c = ': AND NOT IO.EndOf[rs] AND IO.PeekChar[rs]='= THEN { <> [] ¬ IO.GetChar[rs]; rope ¬ ":="; toklen ¬ 2; token.class ¬ scanTab['_]; } ELSE { token.class ¬ scanTab[c] }; }; tokenDOUBLE => { <> c1: CHAR ¬ rf.Fetch[tokpos]; SELECT c1 FROM '* => token.class ¬ tokenPOWER; -- for ** '= => token.class ¬ tokenARROW; -- for == '< => token.class ¬ tokenLE; -- for <= '> => token.class ¬ tokenGE; -- for >= '. => token.class ¬ tokenDOTS; -- for .. ': => token.class ¬ scanTab['_]; -- for := '~ => SELECT rf.Fetch[tokpos+1] FROM '= => token.class ¬ scanTab['#]; -- for ~= '< => token.class ¬ tokenGE; -- for ~< '> => token.class ¬ tokenLE; -- for ~> ENDCASE => GO TO syntaxError; ENDCASE => GO TO syntaxError; }; tokenCOMMENT => { <> LOOP; }; tokenEOF => { token.class ¬ endMarker; token.value ¬ MPP1.nullValue; }; tokenERROR => GO TO syntaxError; <> ENDCASE => ERROR; -- all cases should have been covered EXIT; REPEAT syntaxError => { ErrorContext["Syntax error", tokpos, errPut]; }; badNumber => { ErrorContext["invalid number", tokpos, errPut]; }; CheckEnd => {}; ENDLOOP; <> nTokens ¬ nTokens + 1; lastToken ¬ tokpos; RETURN; }; <> WrapLit: PROC [r: REF ANY] RETURNS [LTIndex] = { RETURN [NEW[LTNode ¬ [index: tokpos, value: r, literal: rf.Flatten[tokpos, toklen]]]]}; <> ScanInit: PUBLIC SAFE PROC [source: ROPE] RETURNS [vIndex: IndexTableRef, vBody: VocabularyRef] = TRUSTED { IF vocab = NIL THEN { scanTab ¬ MPParseTable.InitScanTable[]; hashTab ¬ MPParseTable.InitHashTable[]; vocabIndex ¬ MPParseTable.InitIndexTable[]; vocab ¬ MPParseTable.InitVocabulary[]; }; << hashTab ¬ @table[table.scanTable.hashTab]; scanTab ¬ @table[table.scanTable.scanTab]; vocab ¬ LOOPHOLE[@table[table.scanTable.vocabBody]]; vocabIndex ¬ @table[table.scanTable.vocabIndex]; >> rf ¬ source; rs ¬ IO.RIS[rf]; tokpos ¬ 0; lastToken ¬ 0; nTokens ¬ nErrors ¬ 0; RETURN[vIndex, vBody]; }; ScanReset: PUBLIC SAFE PROC RETURNS [CARDINAL, CARDINAL] = TRUSTED { rf ¬ NIL; rs ¬ NIL; RETURN [nTokens, nErrors]; }; <> ResetScanIndex: PUBLIC SAFE PROC [index: INT] RETURNS [success: BOOL] = TRUSTED { tokpos ¬ index; RETURN [TRUE]; }; ErrorContext: PUBLIC SAFE PROC [message: ROPE, tokenIndex: INT, put: IO.STREAM] = TRUSTED { low: INT ¬ tokenIndex - 40; high: INT ¬ tokenIndex + 40; nErrors ¬ nErrors + 1; IF low < 0 THEN low ¬ 0; IF high >= rf.Size[] THEN high ¬ rf.Size[]-1; put.PutChar['\n]; IF low > 0 THEN put.PutRope["..."]; FOR i: INT IN [low..high] DO c: CHAR ¬ rf.Fetch[i]; IF i = tokenIndex THEN put.PutRope[" *^* "]; put.PutChar[c]; ENDLOOP; IF high < rf.Size[]-1 THEN put.PutRope["..."]; put.PutChar['\n]; put.PutRope[message]; put.PutChar['\n]; }; END.