<<>> <> <> <> <> <> <<>> DIRECTORY Ascii USING [CR, FF, LF, Lower, SP, TAB], Atom USING [GetPName, MakeAtom, MakeAtomFromChar, MakeAtomFromRefText], BigCardinals USING [BigAdd, BigCARD, BigFromCard, BigFromSmall, BigZero, BigMultiply, MultiplyByDigit, Zero], FS USING [StreamOpen], IO, RefText USING [AppendChar, ObtainScratch, ReleaseScratch, TrustTextAsRope], Rope, Scheme, SchemeExtras, SymTab USING [Create, Fetch, Ref, Store]; SchemeScannerImpl: CEDAR PROGRAM IMPORTS Ascii, Atom, BigCardinals, FS, IO, RefText, Rope, Scheme, SymTab EXPORTS SchemeExtras ~ BEGIN OPEN Scheme, SchemeExtras; ROPE: TYPE ~ Rope.ROPE; Failed: ERROR ~ CODE; -- used in ScanNumber, below. bigZero: BigCardinals.BigCARD ~ BigCardinals.BigFromSmall[0]; bigOne: BigCardinals.BigCARD ~ BigCardinals.BigFromSmall[1]; MultiplyByCARD: PROC [big: BigCardinals.BigCARD, small: CARD] RETURNS [BigCardinals.BigCARD] ~ { IF small = 0 OR BigCardinals.BigZero[big] THEN RETURN [bigZero]; IF small <= CARD16.LAST THEN RETURN [BigCardinals.MultiplyByDigit[big, small]]; RETURN [BigCardinals.BigMultiply[big, BigCardinals.BigFromCard[small]]]; }; Digits: TYPE ~ RECORD [small: CARD, bigScale: CARD, big: BigCardinals.BigCARD ¬ BigCardinals.Zero]; -- represents small+(bigScale*big) digitsZero: Digits ~ [small: 0, bigScale: 0, big: bigZero]; digitsOne: Digits ~ [small: 1, bigScale: 0, big: bigZero]; smallLimit: CARD ~ CARD.LAST/16; -- if bigger than this, can't fit another digit <> GetToken: PUBLIC PROC [stream: IO.STREAM, buffer: REF TEXT, flushComments: BOOL ¬ TRUE, buildValue: BOOL ¬ FALSE] RETURNS [tokenKind: TokenKind, token: REF TEXT, error: ScanningError ¬ none, value: Any ¬ unspecified] ~ { char: CHAR ¬ ' ; NextChar: PROC RETURNS [CHAR] ~ { token ¬ RefText.AppendChar[token, (char ¬ IO.GetChar[stream])]; char ¬ Ascii.Lower[char]; RETURN [char]; }; NextStringChar: PROC RETURNS [CHAR] ~ { <> token ¬ RefText.AppendChar[token, (char ¬ IO.GetChar[stream])]; RETURN [char]; }; NextCharSkip: PROC RETURNS [CHAR] ~ { <> char ¬ IO.GetChar[stream]; RETURN [char]; }; PutbackChar: PROC ~ { IO.Backup[stream, token[token.length ¬ token.length - 1]]; }; BadIdentifier: PROC ~ { <> DO SELECT NextChar[ ! IO.EndOfStream => EXIT] FROM Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF, '(, '), '", '; => { PutbackChar[]; EXIT; }; ENDCASE => NULL; ENDLOOP; tokenKind ¬ error; value ¬ StringFromRope[Rope.FromRefText[token]]; token ¬ "Illegally-formed identifier"; error ¬ badIdentifier; }; BadEOF: PROC ~ { tokenKind ¬ error; value ¬ StringFromRope[Rope.FromRefText[token]]; token ¬ "Unexpected end of file before end of token"; error ¬ earlyEOF; }; ScanNumber: PROC ~ { <> terminatedByEOF: BOOL ¬ FALSE; DO SELECT NextStringChar[ ! IO.EndOfStream => { terminatedByEOF ¬ TRUE; EXIT}] FROM Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF, '(, '), '", '; => { PutbackChar[]; EXIT; }; ENDCASE => NULL; ENDLOOP; BEGIN index: NAT ¬ 0; len: NAT ~ token.length; char: CHAR ¬ ' ; radix: CARDINAL ¬ 0; -- unknown Exactness: TYPE ~ {true, false, unknown}; exact: Exactness ¬ unknown; Next: PROC RETURNS [CHAR] ~ { IF index >= len THEN ERROR IO.EndOfStream[NIL]; char ¬ Ascii.Lower[token[index]]; index ¬ index + 1; RETURN [char]; }; Peek: PROC RETURNS [CHAR] ~ { IF index >= len THEN ERROR IO.EndOfStream[NIL]; RETURN [char ¬ Ascii.Lower[token[index]]]; }; Bump: PROC ~ INLINE { index ¬ index + 1; }; Fail: PROC [msg: REF TEXT] ~ { tokenKind ¬ error; error ¬ badNumber; value ¬ StringFromRope[Rope.FromRefText[token]]; token ¬ msg; ERROR Failed; }; Exact: PROC [value: Exactness] ~ { IF exact = unknown THEN exact ¬ value ELSE Fail["Too many exactness specifiers"]; }; Radix: PROC [value: CARDINAL] ~ { IF radix = 0 THEN radix ¬ value ELSE Fail["Too many radix specifiers"]; }; UReal: PROC [negative: BOOL] RETURNS [uReal: Any ¬ NIL] ~ { <> Which: TYPE ~ {num, denom}; val: ARRAY Which OF Digits ¬ [num: digitsZero, denom: digitsOne]; exponent: INT ¬ 0; exponentSign: INT ¬ 1; Fold: PROC [w: Which] ~ { val[w].big ¬ BigCardinals.BigAdd[ MultiplyByCARD[val[w].big, val[w].bigScale], BigCardinals.BigFromCard[val[w].small] ]; val[w].small ¬ 0; val[w].bigScale ¬ 1; }; GetVal: PROC [w: Which] RETURNS [BigCardinals.BigCARD] ~ INLINE { Fold[w]; RETURN [val[w].big] }; AccumDigit: PROC [w: Which, digit: CARDINAL] ~ INLINE { -- uses radix IF buildValue THEN { IF val[w].small > smallLimit OR val[w].bigScale > smallLimit THEN Fold[w]; val[w].small ¬ val[w].small*radix+digit; IF val[w].bigScale # 0 THEN val[w].bigScale ¬ val[w].bigScale*radix; }; }; AccumCheckedDigit: PROC [w: Which, digit: CARDINAL] ~ { IF digit >= radix THEN Fail[Rope.ToRefText[IO.PutFR["Illegal digit `%g' in radix %g numbers", [character[char]], [integer[radix]]]]] ELSE AccumDigit[w, digit] }; AccumAfterDotDigit: PROC [digit: CARDINAL] ~ { AccumDigit[num, digit]; AccumDigit[denom, 0]; }; EnsureDecimal: PROC ~ { EnsureInexact[]; IF radix # 10 THEN Fail[Rope.ToRefText[IO.PutFR1["The character `%g' may not appear in a decimal number", [character[char]]]]]; }; EnsureInexact: PROC ~ { IF exact = true THEN Fail[Rope.ToRefText[IO.PutFR1["The character `%g' may not appear in an exact number", [character[char]]]]] ELSE exact ¬ false; }; State: TYPE ~ {start, inNum, inNumHashes, initialDot, inDecimal, inDecimalHashes, afterExpMarker, afterExpSign, inExp, afterSlash, inDenom, inDenomHashes}; <> state: State ¬ start; << [Artwork node; type 'Artwork on' to command tool] >> BEGIN -- for exits ENABLE IO.EndOfStream => SELECT state FROM start, initialDot, afterSlash, afterExpMarker => GO TO BadEOFReturn; ENDCASE => GO TO OKReturn; SELECT Next[] FROM IN ['0..'9] => { AccumCheckedDigit[num, char - '0]; state ¬ inNum; }; IN ['a..'f] => { AccumCheckedDigit[num, char - 'a + 10]; state ¬ inNum; }; '. => { EnsureDecimal[]; state ¬ initialDot; }; ENDCASE => GO TO BadCharReturn; DO -- finite state machine main loop SELECT state FROM inNum => SELECT Peek[] FROM IN ['0..'9] => { Bump[]; AccumCheckedDigit[num, char - '0]; }; IN ['a..'c] => { Bump[]; AccumCheckedDigit[num, char - 'a + 10]; }; 'd, 'e, 'f => { Bump[]; IF radix = 16 THEN AccumDigit[num, char - 'a + 10] ELSE state ¬ afterExpMarker; }; 's, 'l => { Bump[]; state ¬ afterExpMarker; }; '. => { Bump[]; EnsureDecimal[]; state ¬ inDecimal; }; '/ => { Bump[]; state ¬ afterSlash; }; '# => { Bump[]; EnsureInexact[]; AccumDigit[num, 0]; state ¬ inNumHashes; }; ENDCASE => GO TO OKReturn; inNumHashes => SELECT Peek[] FROM '# => { Bump[]; AccumDigit[num, 0]; }; '/ => { Bump[]; state ¬ afterSlash; }; '. => { Bump[]; EnsureDecimal[]; state ¬ inDecimalHashes; }; 'e, 's, 'f, 'd, 'l => { Bump[]; state ¬ afterExpMarker; }; ENDCASE => GO TO OKReturn; initialDot => IF Next[] IN ['0..'9] THEN { AccumAfterDotDigit[char - '0]; state ¬ inDecimal; } ELSE GO TO BadCharReturn; inDecimal => SELECT Peek[] FROM IN ['0..'9] => { Bump[]; AccumAfterDotDigit[char - '0]; }; 'e, 's, 'f, 'd, 'l => { Bump[]; state ¬ afterExpMarker; }; '# => { Bump[]; AccumAfterDotDigit[0]; state ¬ inDecimalHashes; }; ENDCASE => GO TO OKReturn; inDecimalHashes => SELECT Peek[] FROM '# => { Bump[]; <> }; 'e, 's, 'f, 'd, 'l => { Bump[]; state ¬ afterExpMarker; }; ENDCASE => GO TO OKReturn; afterExpMarker => { EnsureDecimal[]; SELECT Next[] FROM '+ => state ¬ afterExpSign; '- => { exponentSign ¬ -1; state ¬ afterExpSign; }; IN ['0..'9] => { exponent ¬ char - '0; state ¬ inExp; }; ENDCASE => GO TO BadCharReturn; }; afterExpSign => IF Next[] IN ['0..'9] THEN { exponent ¬ exponent * 10 + char - '0; state ¬ inExp; } ELSE GO TO BadCharReturn; inExp => IF Peek[] IN ['0..'9] THEN { Bump[]; exponent ¬ exponent * 10 + char - '0; } ELSE GO TO OKReturn; afterSlash => { val[denom] ¬ digitsZero; SELECT Next[] FROM IN ['0..'9] => { AccumCheckedDigit[denom, char - '0]; state ¬ inDenom; }; IN ['a..'f] => { AccumCheckedDigit[denom, char - 'a + 10]; state ¬ inDenom; }; ENDCASE => GO TO BadCharReturn; }; inDenom => SELECT Peek[] FROM IN ['0..'9] => { Bump[]; AccumCheckedDigit[denom, char - '0]; }; IN ['a..'f] => { Bump[]; AccumCheckedDigit[denom, char - 'a + 10]; }; '# => { Bump[]; EnsureInexact[]; AccumDigit[denom, 0]; state ¬ inDenomHashes; }; ENDCASE => GO TO OKReturn; inDenomHashes => IF Peek[] = '# THEN { Bump[]; AccumDigit[denom, 0]; } ELSE GO TO OKReturn; ENDCASE => ERROR; ENDLOOP; EXITS BadCharReturn => Fail[Rope.ToRefText[IO.PutFR1["Unexpected character `%g' in number", [character[char]]]]]; BadEOFReturn => { IF terminatedByEOF THEN BadEOF[] ELSE { tokenKind ¬ error; error ¬ badNumber; value ¬ StringFromRope[Rope.FromRefText[token]]; token ¬ "Numeric token ended prematurely"; }; ERROR Failed; }; OKReturn => { IF buildValue THEN { IF exact # false AND exponent = 0 AND val[num].bigScale = 0 AND val[denom].bigScale = 0 AND val[denom].small = 1 AND val[num].small <= CARD[INT.LAST] THEN { <> uReal ¬ MakeFixnum[ IF negative THEN -INT[val[num].small] ELSE INT[val[num].small] ]; } ELSE IF exact = false AND exponent = 0 AND val[num].bigScale = 0 AND val[denom].bigScale = 0 AND val[num].small <= 1000000 AND val[denom].small <= 1000000 THEN { <> float: REAL ¬ REAL[INT[val[num].small]] / REAL[INT[val[denom].small]]; IF negative THEN float ¬ -float; uReal ¬ NEW[NumberRep.flonum ¬ [FALSE, flonum[float]]]; } ELSE uReal ¬ MakeReal[ negative: negative, numerator: GetVal[num], denominator: GetVal[denom], exponent: exponent * exponentSign, radix: 10, -- just for the exponent now exact: exact # false]; }; RETURN; }; END; }; MakeInteger: PROC [i: INT] RETURNS [Any] ~ { <> IF exact # false THEN RETURN [MakeFixnum[i]] ELSE RETURN [NEW[NumberRep.flonum ¬ [FALSE, flonum[REAL[i]]]]]; }; tokenKind ¬ number; BEGIN -- for exits ENABLE { Failed => GO TO Return; IO.EndOfStream => GO TO BadEOFReturn; }; <> WHILE Peek[] = '# DO Bump[]; SELECT Next[] FROM 'b => Radix[2]; 'd => Radix[8]; 'o => Radix[10]; 'x => Radix[16]; 'w => Radix[16]; 'e => Exact[true]; 'i => Exact[false]; ENDCASE => Fail["Illegal #-specifier in number"]; ENDLOOP; IF radix = 0 THEN radix ¬ 10; <> SELECT Peek[] FROM '+, '- => { negative: BOOL ~ (char = '-); Bump[]; IF Peek[] = 'i THEN { Bump[]; IF buildValue THEN value ¬ MakeRectangular[MakeInteger[0], MakeInteger[IF negative THEN -1 ELSE 1]]; GO TO EndCheck; }; value ¬ UReal[negative]; IF Peek[! IO.EndOfStream => GO TO Return] = 'i THEN { Bump[]; IF buildValue THEN value ¬ MakeRectangular[MakeInteger[0], value]; GO TO EndCheck; }; }; ENDCASE => value ¬ UReal[FALSE]; <> SELECT Peek[! IO.EndOfStream => GO TO Return] FROM '@ => { magnitude: Any ¬ value; Bump[]; SELECT Peek[] FROM '+, '- => { Bump[]; value ¬ UReal[char = '-]; }; ENDCASE => value ¬ UReal[FALSE]; IF buildValue THEN value ¬ MakePolar[magnitude, value]; GO TO EndCheck; }; '+, '- => { negative: BOOL ~ (char = '-); realPart: Any ¬ value; Bump[]; IF Peek[] = 'i THEN { -- this is the `R+i' or `R-i' case Bump[]; IF buildValue THEN value ¬ MakeRectangular[value, MakeInteger[IF negative THEN -1 ELSE 1]]; GO TO EndCheck; }; value ¬ UReal[negative]; IF Next[] # 'i THEN Fail["Complex number does not end in `i'"]; IF buildValue THEN value ¬ MakeRectangular[realPart, value]; GO TO EndCheck; }; ENDCASE => GO TO EndCheck; <> EXITS Return => RETURN; EndCheck => { IF index # len THEN { tokenKind ¬ error; error ¬ badNumber; value ¬ StringFromRope[Rope.FromRefText[token]]; token ¬ "Numeric token contains extra characters"; }; RETURN; }; BadEOFReturn => { IF terminatedByEOF THEN BadEOF[] ELSE { tokenKind ¬ error; error ¬ badNumber; value ¬ StringFromRope[Rope.FromRefText[token]]; token ¬ "Numeric token ended prematurely"; }; RETURN; }; END; END; }; ScanIdentifier: PROC [allowTail: BOOL] ~ { SELECT NextChar[ ! IO.EndOfStream => GO TO BadEOFReturn] FROM '# => { IF NextChar[ ! IO.EndOfStream => GO TO BadEOFReturn] # '" THEN { PutbackChar[]; BadIdentifier[]; RETURN; } ELSE { ScanString[identifier]; RETURN; }; }; '+, '-, '., IN ['0..'9] => { <> BadIdentifier[]; RETURN; }; ENDCASE => { IF NOT identifierConstituent[char] THEN { PutbackChar[]; BadIdentifier[]; RETURN; } ELSE { buffer: REF TEXT ¬ IF buildValue THEN RefText.ObtainScratch[100] ELSE NIL; AddChar: PROC ~ { IF buildValue THEN buffer ¬ RefText.AppendChar[buffer, char]; }; AddChar[]; DO SELECT NextChar[ ! IO.EndOfStream => EXIT] FROM Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF, '(, '), '", '; => { PutbackChar[]; EXIT; }; '# => { IF allowTail THEN { PutbackChar[]; EXIT; } ELSE { BadIdentifier[]; RETURN; } }; ENDCASE => IF identifierConstituent[char] THEN AddChar[] ELSE { BadIdentifier[]; RETURN; }; ENDLOOP; tokenKind ¬ identifier; IF buildValue THEN { value ¬ Atom.MakeAtomFromRefText[buffer]; RefText.ReleaseScratch[buffer]; }; }; }; EXITS BadEOFReturn => { BadEOF[]; RETURN; }; }; ScanIdentifierTail: PROC ~ { <> IF IO.PeekChar[stream ! IO.EndOfStream => GO TO Return] = '# THEN { interfaceName: Any ~ value; [] ¬ NextChar[]; -- skip the hash mark ScanIdentifier[FALSE]; IF tokenKind = identifier THEN { -- as opposed to `error' tokenKind ¬ moduleReference; IF buildValue THEN value ¬ Cons[$access, Cons[interfaceName, Cons[value, NIL]]]; }; }; EXITS Return => RETURN; }; ScanString: PROC [nonErrorKind: TokenKind] ~ { buffer: REF TEXT ¬ IF buildValue THEN RefText.ObtainScratch[100] ELSE NIL; AddChar: PROC [c: CHAR] ~ { IF buildValue THEN buffer ¬ RefText.AppendChar[buffer, c]; }; tokenKind ¬ nonErrorKind; DO SELECT NextStringChar[ ! IO.EndOfStream => EXIT] FROM '" => { IF buildValue THEN { SELECT nonErrorKind FROM string => value ¬ StringFromRope[Rope.FromRefText[buffer]]; identifier => value ¬ Atom.MakeAtomFromRefText[buffer]; ENDCASE => ERROR; RefText.ReleaseScratch[buffer]; }; RETURN; }; '\\ => { SELECT NextChar[ ! IO.EndOfStream => EXIT] FROM 'b => AddChar['\b]; 'f, 'p => AddChar['\f]; 'l => AddChar['\l]; 'n => AddChar['\n]; 'r => AddChar['\r]; 't => AddChar['\t]; '", '\\ => AddChar[char]; IN ['0..'3] => { ascii: [0 .. 377B] ¬ ORD[char] - ORD['0]; FOR i: INT IN [3..4] DO SELECT NextChar[ ! IO.EndOfStream => EXIT] FROM IN ['0..'7] => ascii ¬ ascii * 10B + ORD[char] - ORD['0]; ENDCASE => { tokenKind ¬ error; error ¬ unknownStringEscape; value ¬ StringFromRope[Rope.FromRefText[s: token, start: token.length - i, len: i]]; token ¬ "Unknown escape sequence in string"; RETURN; }; ENDLOOP; AddChar[VAL[ascii]]; }; ENDCASE => { tokenKind ¬ error; error ¬ unknownStringEscape; value ¬ StringFromRope[Rope.FromRefText[s: token, start: token.length - 2, len: 2]]; token ¬ "Unknown escape sequence in string"; RETURN; }; }; ENDCASE => AddChar[char]; ENDLOOP; <> BadEOF[]; RETURN; }; token ¬ buffer; BEGIN -- for exits DO -- until we get a real token token.length ¬ 0; SELECT NextChar[ ! IO.EndOfStream => GO TO EOFReturn] FROM Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF => LOOP; '; => { tokenKind ¬ comment; IF flushComments THEN { DO SELECT NextCharSkip[ ! IO.EndOfStream => GO TO EOFReturn] FROM Ascii.CR, Ascii.LF => EXIT; ENDCASE; ENDLOOP; } ELSE DO SELECT NextChar[ ! IO.EndOfStream => GO TO Return] FROM Ascii.CR, Ascii.LF => GO TO PutbackReturn; <> ENDCASE; ENDLOOP; }; '( => { tokenKind ¬ openParenthesis; RETURN}; ') => { tokenKind ¬ closeParenthesis; RETURN}; '' => { tokenKind ¬ quote; RETURN}; '` => { tokenKind ¬ quasiquote; RETURN}; ', => { tokenKind ¬ unquote; IF NextChar[ ! IO.EndOfStream => GO TO Return] = '@ THEN { tokenKind ¬ unquoteSplicing; RETURN; } ELSE GO TO PutbackReturn; }; IN ['0 .. '9] => { ScanNumber[]; RETURN}; '. => { -- either dot or the peculiar identifier "..." or a number. tokenKind ¬ dot; SELECT NextChar[ ! IO.EndOfStream => GO TO Return] FROM Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF, '(, '), '", '; => GO TO PutbackReturn; IN ['0 .. '9] => { ScanNumber[]; RETURN}; '. => { SELECT NextChar[ ! IO.EndOfStream => GO TO BadEOFReturn] FROM Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF, '(, '), '", ' => { PutbackChar[]; BadIdentifier[]; RETURN}; '. => NULL; ENDCASE => { BadIdentifier[]; RETURN}; tokenKind ¬ identifier; IF buildValue THEN value ¬ Atom.MakeAtom["..."]; <> SELECT IO.PeekChar[stream ! IO.EndOfStream => GO TO Return] FROM Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF, '(, '), '", '; => RETURN; '# => { ScanIdentifierTail[]; RETURN}; ENDCASE => { BadIdentifier[]; RETURN}; }; ENDCASE => { BadIdentifier[]; RETURN}; }; '+, '- => { -- either a peculiar identifier ("+" or "-") or a number. tokenKind ¬ identifier; IF buildValue THEN value ¬ Atom.MakeAtomFromChar[char]; SELECT IO.PeekChar[stream ! IO.EndOfStream => GO TO Return] FROM Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF, '(, '), '", '; => RETURN; '# => { ScanIdentifierTail[]; RETURN}; ENDCASE => { ScanNumber[]; RETURN}; }; '#=> { SELECT NextChar[ ! IO.EndOfStream => GO TO BadEOFReturn] FROM 't, 'f => { tokenKind ¬ boolean; IF buildValue THEN value ¬ IF char = 't THEN true ELSE false; RETURN}; 'b, 'o, 'd, 'x, 'w, 'i, 'e => { ScanNumber[]; RETURN}; '\\ => { found: BOOL; buffer: REF TEXT ¬ RefText.ObtainScratch[10]; DO SELECT NextStringChar[ ! IO.EndOfStream => IF buffer.length = 0 THEN GO TO BadEOFReturn ELSE EXIT] FROM Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF, '(, '), '", '; => { IF buffer.length = 0 THEN buffer ¬ RefText.AppendChar[buffer, char] ELSE PutbackChar[]; EXIT; }; ENDCASE => buffer ¬ RefText.AppendChar[buffer, char]; ENDLOOP; tokenKind ¬ character; <> IF buffer.length = 1 THEN { IF buildValue THEN value ¬ MakeChar[buffer[0]]; RETURN; }; [found, value] ¬ SymTab.Fetch[x: charForName, key: RefText.TrustTextAsRope[buffer]]; RefText.ReleaseScratch[buffer]; IF NOT found THEN { tokenKind ¬ error; error ¬ unknownCharacterName; value ¬ StringFromRope[Rope.FromRefText[token]]; token ¬ "Unknown named character" }; RETURN}; '( => { tokenKind ¬ openVector; RETURN}; '" => { ScanString[identifier]; IF tokenKind = identifier THEN -- as opposed to `error' ScanIdentifierTail[]; RETURN}; '! => { found: BOOL; buffer: REF TEXT ¬ RefText.ObtainScratch[10]; DO SELECT NextStringChar[ ! IO.EndOfStream => IF buffer.length = 0 THEN GO TO BadEOFReturn ELSE EXIT] FROM Ascii.SP, Ascii.TAB, Ascii.CR, Ascii.LF, Ascii.FF, '(, '), '", '; => { IF buffer.length = 0 THEN buffer ¬ RefText.AppendChar[buffer, char] ELSE PutbackChar[]; EXIT; }; ENDCASE => buffer ¬ RefText.AppendChar[buffer, char]; ENDLOOP; tokenKind ¬ character; [found, value] ¬ SymTab.Fetch[x: primitiveSyntaxForName, key: RefText.TrustTextAsRope[buffer]]; RefText.ReleaseScratch[buffer]; IF NOT found THEN { tokenKind ¬ error; error ¬ unknownPrimitiveSyntax; value ¬ StringFromRope[Rope.FromRefText[token]]; token ¬ "Unknown primitive syntax marker" }; RETURN}; ENDCASE => { tokenKind ¬ error; error ¬ unknownHashDispatch; value ¬ StringFromRope[Rope.FromRefText[token]]; token ¬ "Unknown # token"; RETURN}; }; '"=> { ScanString[string]; RETURN}; ENDCASE => { PutbackChar[]; ScanIdentifier[TRUE]; IF tokenKind = identifier THEN ScanIdentifierTail[]; RETURN}; ENDLOOP; EXITS Return => { RETURN }; PutbackReturn => { PutbackChar[]; RETURN}; BadEOFReturn => { BadEOF[]; RETURN}; EOFReturn => { tokenKind ¬ endOfFile; RETURN}; END; }; primitiveSyntaxForName: SymTab.Ref ~ InitPrimitiveSyntax[]; InitPrimitiveSyntax: PROC RETURNS [table: SymTab.Ref] ~ { table ¬ SymTab.Create[case: FALSE]; FOR ps: PrimitiveSyntaxRep IN PrimitiveSyntaxRep DO [] ¬ SymTab.Store[x: table, key: Atom.GetPName[symbolForPrimitiveSyntaxRep[ps]], val: primitiveSyntaxForPrimitiveSyntaxRep[ps]]; ENDLOOP; }; charForName: SymTab.Ref ~ SymTab.Create[case: FALSE]; nameForChar: REF ARRAY CHAR OF ROPE ~ InitCharNames[]; InitCharNames: PROC RETURNS [REF ARRAY CHAR OF ROPE] ~ { a: REF ARRAY CHAR OF ROPE ¬ NEW[ARRAY CHAR OF ROPE]; AddName: PROC [char: CHAR, name: ROPE] ~ { [] ¬ SymTab.Store[x: charForName, key: name, val: MakeChar[char]]; a[char] ¬ name; }; <> FOR c: CHAR IN CHAR DO AddName[c, Rope.Flatten[IO.PutFR1["%03b", [integer[ORD[c]]]]]]; ENDLOOP; AddName['(, Rope.Flatten["OpenPar"]]; AddName['(, Rope.Flatten["OpenParen"]]; AddName['), Rope.Flatten["ClosePar"]]; AddName['), Rope.Flatten["CloseParen"]]; FOR c: CHAR IN (' ..'~] DO AddName[c, Rope.FromChar[c]]; ENDLOOP; AddName['\r, Rope.Flatten["Return"]]; AddName['\l, Rope.Flatten["LineFeed"]]; AddName['\n, Rope.Flatten["Newline"]]; AddName['\t, Rope.Flatten["Tab"]]; AddName[' , Rope.Flatten["Space"]]; AddName['\f, Rope.Flatten["FormFeed"]]; AddName['\f, Rope.Flatten["Page"]]; RETURN [a] }; identifierConstituent: REF ARRAY CHAR OF BOOL ~ InitIdentifierConstituent[]; InitIdentifierConstituent: PROC RETURNS [a: REF ARRAY CHAR OF BOOL] ~ { a ¬ NEW[ARRAY CHAR OF BOOL ¬ ALL[FALSE]]; FOR c: CHAR IN ['0..'9] DO a[c] ¬ TRUE; ENDLOOP; FOR c: CHAR IN ['A..'Z] DO a[c] ¬ TRUE; ENDLOOP; FOR c: CHAR IN ['a..'z] DO a[c] ¬ TRUE; ENDLOOP; a['!] ¬ TRUE; a['$] ¬ TRUE; a['%] ¬ TRUE; a['&] ¬ TRUE; a['*] ¬ TRUE; a['/] ¬ TRUE; a[':] ¬ TRUE; a['<] ¬ TRUE; a['=] ¬ TRUE; a['>] ¬ TRUE; a['?] ¬ TRUE; a['~] ¬ TRUE; a['_] ¬ TRUE; a['^] ¬ TRUE; a['.] ¬ TRUE; a['+] ¬ TRUE; a['-] ¬ TRUE; }; <> Test: PROC [name: ROPE, out: IO.STREAM] ~ { in: IO.STREAM ~ FS.StreamOpen[name]; buffer: REF TEXT ¬ NEW[TEXT[10]]; token: REF TEXT ¬ NEW[TEXT[10]]; tokenKind: TokenKind; error: ScanningError; value: Any; DO [tokenKind, token, error, value] ¬ GetToken[in, buffer, FALSE, TRUE]; SELECT tokenKind FROM identifier, moduleReference, boolean, number, character, string, primitiveSyntax => IO.PutF[out, "%g: %g\n", [refAny[NEW[TokenKind ¬ tokenKind]]], [refAny[value]]]; endOfFile => EXIT; error => IO.PutF[out, "**Error (%g)** %g\n\t%g\n", [refAny[NEW[ScanningError ¬ error]]], [text[token]], [refAny[value]]]; comment => IO.PutF1[out, "%g\n", [text[token]]]; ENDCASE => IO.PutF[out, "%g: %g\n", [refAny[NEW[TokenKind ¬ tokenKind]]], [text[token]]]; ENDLOOP; IO.Close[in]; }; <<>> END.