<> <> <> DIRECTORY PS, Ascii, Convert, IO, RefText; PSScannerImpl: CEDAR PROGRAM IMPORTS PS, Convert, IO, RefText EXPORTS PS ~ BEGIN OPEN PS; Class: TYPE ~ { newLine, -- CR, LF space, -- SP, TAB openParen, -- '( closeParen, -- ') openBracket, -- '[ closeBracket, -- '] openAngle, -- '< closeAngle, -- '> openBrace, -- '{ closeBrace, -- '} slash, -- '/ percent, -- '% dot, -- '. hash, -- '# sign, -- '+, '- digitOctal, -- IN ['0..'7] digitOther, -- IN ['0..'9] (other than digitOctal) letterE, -- 'E, 'e letterHex, -- IN ['A..'F], IN ['a..'f] (other than letterE) letterOther, -- IN ['A..'Z], IN ['a..'z] (other than letterE, letterHex) backslash, -- '\ other -- everything else }; WhiteSpace: TYPE ~ Class[newLine..space]; Special: TYPE ~ Class[openParen..percent]; Regular: TYPE ~ Class[dot..other]; DecimalDigit: TYPE ~ Class[digitOctal..digitOther]; -- ['0..'9] HexDigit: TYPE ~ Class[digitOctal..letterHex]; -- ['0..'9], ['A..'F], ['a..'f] RadixDigit: TYPE ~ Class[digitOctal..letterOther]; -- ['0..'9], ['A..'Z], ['a..'z] ClassArray: TYPE ~ PACKED ARRAY CHAR OF Class; InitClassArray: PROC RETURNS [array: REF ClassArray] ~ { array _ NEW [ClassArray]; FOR char: CHAR IN CHAR DO array[char] _ SELECT char FROM Ascii.CR, Ascii.LF => newLine, Ascii.SP, Ascii.TAB => space, '( => openParen, ') => closeParen, '[ => openBracket, '] => closeBracket, '< => openAngle, '> => closeAngle, '{ => openBrace, '} => closeBrace, '/ => slash, '% => percent, '. => dot, '# => hash, '+, '- => sign, IN ['0..'7] => digitOctal, IN ['0..'9] => digitOther, 'E, 'e => letterE, IN ['A..'F], IN ['a..'f] => letterHex, IN ['A..'Z], IN ['a..'z] => letterOther, '\\ => backslash, ENDCASE => other; ENDLOOP; }; classFromChar: REF ClassArray ~ InitClassArray[]; Digit: TYPE ~ [0..36); DigitArray: TYPE ~ PACKED ARRAY CHAR OF Digit; InitDigitArray: PROC RETURNS [array: REF DigitArray] ~ { array _ NEW [DigitArray]; FOR char: CHAR IN CHAR DO array[char] _ SELECT char FROM IN ['0..'9] => char-'0, IN ['A..'Z] => 10+(char-'A), IN ['a..'z] => 10+(char-'a), ENDCASE => 0; ENDLOOP; }; digit: REF DigitArray ~ InitDigitArray[]; EscapeArray: TYPE ~ PACKED ARRAY CHAR OF CHAR; InitEscapeArray: PROC RETURNS [array: REF EscapeArray] ~ { array _ NEW [EscapeArray]; FOR char: CHAR IN CHAR DO array[char] _ SELECT char FROM 'n => Ascii.LF, 'r => Ascii.CR, 't => Ascii.TAB, 'b => Ascii.BS, 'f => Ascii.FF, IN ['0..'7] => VAL[digit[char]], ENDCASE => char; ENDLOOP; }; escape: REF EscapeArray ~ InitEscapeArray[]; State: TYPE ~ { empty, -- only whitespace so far sign, -- + | - int0, -- digit | int0 digit a valid integer (without sign) int1, -- sign digit | int1 digit a valid integer (with sign) real0, -- . | sign . real1, -- ( int0 | int1 ) . | real0 digit | real1 digit a valid real (without exponent) real2, -- ( int0 | int1 | real1 ) ( e | E ) real3, -- real2 ( + | - ) real4, -- ( real2 | real3 ) digit | real4 digit a valid real (with exponent) radix0, -- int0 # radix1, -- radix0 rdigit | radix1 rdigit a valid radix number slash, -- / name, -- char | slash char | // | name char a sequence of regular chars string0, -- ( ... string1, -- ( ... \ string2, -- ( ... \d string3, -- ( ... \dd hex0, -- < ... even number of digits hex1, -- < ... odd number of digits comment, -- % ... string, -- ( ... ) hex, -- < ... > proc0, -- { proc1, -- } error -- syntax error }; Action: TYPE ~ { skip, -- ignore char putBack, -- return char to input source append, -- append char to text buffer noteSlash, -- note slash preceding name (ignore '/ and increment slash count) stringStart, -- start string literal (ignore '( and start with next char) stringChar, -- include char in string literal escapeStart, -- begin escape sequence in string literal with '\ escapeChar, -- translate escape char following '\ escapeDigit, -- include another octal digit in escape sequence hexDigit1, -- char is first hex digit hexDigit2, -- char is second hex digit, append byte hexFill -- use zero for missing second digit and append byte }; TransitionResult: TYPE ~ RECORD [action: Action, state: State, stop: BOOL _ FALSE]; Transition: PROC [state: State, class: Class] RETURNS [TransitionResult] ~ { SELECT state FROM empty => { IF class IN WhiteSpace THEN RETURN [[action: skip, state: empty]]; IF class IN Special THEN SELECT class FROM openBrace => RETURN [[action: skip, state: proc0, stop: TRUE]]; closeBrace => RETURN [[action: skip, state: proc1, stop: TRUE]]; openBracket, closeBracket => RETURN [[action: append, state: name, stop: TRUE]]; openParen => RETURN [[action: stringStart, state: string0]]; openAngle => RETURN [[action: skip, state: hex0]]; slash => RETURN [[action: noteSlash, state: slash]]; percent => RETURN [[action: skip, state: comment]]; ENDCASE => RETURN [[action: append, state: error, stop: TRUE]]; }; IN[sign..name] => IF state=slash AND class=slash THEN NULL ELSE { IF class IN WhiteSpace THEN RETURN [[action: skip, state: state, stop: TRUE]]; IF class IN Special THEN RETURN [[action: putBack, state: state, stop: TRUE]]; }; IN[hex0..hex1] => { IF class IN WhiteSpace THEN RETURN [[action: skip, state: state]]; }; ENDCASE; SELECT state FROM empty => SELECT class FROM sign => RETURN [[action: append, state: sign]]; dot => RETURN [[action: append, state: real0]]; IN DecimalDigit => RETURN [[action: append, state: int0]]; ENDCASE => RETURN [[action: append, state: name]]; sign => SELECT class FROM dot => RETURN [[action: append, state: real0]]; IN DecimalDigit => RETURN [[action: append, state: int1]]; ENDCASE => RETURN [[action: append, state: name]]; int0 => SELECT class FROM dot => RETURN [[action: append, state: real1]]; letterE => RETURN [[action: append, state: real2]]; hash => RETURN [[action: append, state: radix0]]; IN DecimalDigit => RETURN [[action: append, state: int0]]; ENDCASE => RETURN [[action: append, state: name]]; int1 => SELECT class FROM dot => RETURN [[action: append, state: real1]]; letterE => RETURN [[action: append, state: real2]]; IN DecimalDigit => RETURN [[action: append, state: int1]]; ENDCASE => RETURN [[action: append, state: name]]; real0 => SELECT class FROM IN DecimalDigit => RETURN [[action: append, state: real1]]; ENDCASE => RETURN [[action: append, state: name]]; real1 => SELECT class FROM letterE => RETURN [[action: append, state: real2]]; IN DecimalDigit => RETURN [[action: append, state: real1]]; ENDCASE => RETURN [[action: append, state: name]]; real2 => SELECT class FROM sign => RETURN [[action: append, state: real3]]; IN DecimalDigit => RETURN [[action: append, state: real4]]; ENDCASE => RETURN [[action: append, state: name]]; real3 => SELECT class FROM IN DecimalDigit => RETURN [[action: append, state: real4]]; ENDCASE => RETURN [[action: append, state: name]]; real4 => SELECT class FROM IN DecimalDigit => RETURN [[action: append, state: real4]]; ENDCASE => RETURN [[action: append, state: name]]; radix0 => SELECT class FROM IN RadixDigit => RETURN [[action: append, state: radix1]]; ENDCASE => RETURN [[action: append, state: name]]; radix1 => SELECT class FROM IN RadixDigit => RETURN [[action: append, state: radix1]]; ENDCASE => RETURN [[action: append, state: name]]; slash => SELECT class FROM slash => RETURN [[action: noteSlash, state: name]]; ENDCASE => RETURN [[action: append, state: name]]; name => RETURN [[action: append, state: name]]; string0 => SELECT class FROM closeParen => RETURN [[action: skip, state: string, stop: TRUE]]; backslash => RETURN [[action: escapeStart, state: string1]]; ENDCASE => RETURN [[action: stringChar, state: string0]]; string1 => SELECT class FROM newLine => RETURN [[action: skip, state: string0]]; digitOctal => RETURN [[action: escapeChar, state: string2]]; ENDCASE => RETURN [[action: escapeChar, state: string0]]; string2 => SELECT class FROM closeParen => RETURN [[action: skip, state: string, stop: TRUE]]; digitOctal => RETURN [[action: escapeDigit, state: string3]]; ENDCASE => RETURN [[action: stringChar, state: string0]]; string3 => SELECT class FROM digitOctal => RETURN [[action: escapeDigit, state: string0]]; ENDCASE => RETURN [[action: stringChar, state: string0]]; hex0 => SELECT class FROM closeAngle => RETURN [[action: skip, state: hex, stop: TRUE]]; IN HexDigit => RETURN [[action: hexDigit1, state: hex1]]; ENDCASE => RETURN [[action: skip, state: error, stop: TRUE]]; hex1 => SELECT class FROM closeAngle => RETURN [[action: hexFill, state: hex, stop: TRUE]]; IN HexDigit => RETURN [[action: hexDigit2, state: hex0]]; ENDCASE => RETURN [[action: skip, state: error, stop: TRUE]]; comment => SELECT class FROM newLine => RETURN [[action: skip, state: empty]]; ENDCASE => RETURN [[action: skip, state: comment]]; ENDCASE => RETURN [[action: skip, state: error, stop: TRUE]]; }; TransitionTable: TYPE ~ ARRAY State OF REF TransitionArray; TransitionArray: TYPE ~ ARRAY Class OF TransitionResult; InitTransitionTable: PROC RETURNS [table: REF TransitionTable] ~ { table _ NEW [TransitionTable]; FOR state: State IN State DO array: REF TransitionArray ~ NEW [TransitionArray]; FOR class: Class IN Class DO array[class] _ Transition[state, class]; ENDLOOP; table[state] _ array; ENDLOOP; }; transition: REF TransitionTable ~ InitTransitionTable[]; TokenInt: PROC [text: REF READONLY TEXT] RETURNS [Any] ~ { RETURN [AnyFromInt[Convert.IntFromRope[RefText.TrustTextAsRope[text] ! Convert.Error => GOTO Overflow]]]; EXITS Overflow => RETURN [TokenReal[text]]; }; TokenReal: PROC [text: REF READONLY TEXT] RETURNS [Any] ~ { RETURN [AnyFromReal[Convert.RealFromRope[RefText.TrustTextAsRope[text] ! Convert.Error => GOTO Overflow]]]; EXITS Overflow => RETURN [TokenName[text]]; }; RClass: TYPE ~ { digit, -- IN ['0..'9] sign, -- '+, '- dot, -- '. letterE, -- 'E, 'e other -- everything else }; RClassArray: TYPE ~ PACKED ARRAY CHAR OF RClass; InitRClassArray: PROC RETURNS [array: REF RClassArray] ~ { array _ NEW [RClassArray]; FOR char: CHAR IN CHAR DO array[char] _ SELECT char FROM IN ['0..'9] => digit, '+, '- => sign, '. => dot, 'E, 'e => letterE, ENDCASE => other; ENDLOOP; }; rclassFromChar: REF RClassArray ~ InitRClassArray[]; RState: TYPE ~ { empty, -- only whitespace so far sign, -- + | - int, -- ( empty | sign ) digit | int digit a valid integer real0, -- . | sign . real1, -- int . | real0 digit | real1 digit a valid real (without exponent) real2, -- ( int | real1 ) ( e | E ) real3, -- real2 ( + | - ) real4, -- ( real2 | real3 ) digit | real4 digit a valid real (with exponent) error -- syntax error }; <> <> <> <> <> <> <> <> <> <> < mNeg _ (char='-);>> < { d: [0..10) ~ char-'0;>> <> <> <> <5 THEN fr _ fr+1; }; -- round if 10th digit >5>> <<};>> < eNeg _ (char='-);>> < { d: [0..10) ~ char-'0;>> <> <> <> <<};>> <> < ERROR;>> <> <