DIRECTORY Rope USING [ROPE], HerculesAlgebra USING [Se]; HerculesParseUnparse: DEFINITIONS = BEGIN OPEN Rope, Alg: HerculesAlgebra; Se: TYPE = Alg.Se; Syntax: TYPE = REF SyntaxRec; SyntaxRec: TYPE = RECORD [-- Lexical parameters: maxAtomLength: INTEGER, -- max length of identifiers. Default is 200 maxStringLength: INTEGER, -- max length of strings. Default is 200 chType: ARRAY CHAR OF CharType, -- described below twoCharOps: LIST OF TwoCharOp, -- list of two-character lexemes -- Parser/unparser parameters: defaultAtomProps: Props, -- for atoms and operators. defaultLitProps: Props -- for ROPEs, INTs, REALs ]; CharType: TYPE = {letter, digit, blank, quote, op, invalid, other}; -- Identifiers must start with letter and continue with letters or digits; -- blanks are ignored; quotes delimit strings; ops form one- or two-character -- operators; invalids are invalid in any context; others are valid only inside strings. TwoCharOp: TYPE = RECORD [chars: RECORD [CHAR, CHAR], atom: ATOM -- same two characters in atom form ]; Props: TYPE = RECORD [-- Pick at most one. openfix: BOOL _ FALSE, -- open parenthesis closefix: BOOL _ FALSE, -- close parenthesis matches: ATOM _ NIL, -- matching closefix/openfix for openfix/closefix. -- Tells whether the lexeme may/must/must not be applied. to things to its left -- Pick at least one (just postbreak if closefix). postfix: BOOL _ FALSE, -- must be applied to something to its left postarg: BOOL _ FALSE, -- something to its left must be applied to it postbreak: BOOL _ FALSE, -- should not operate on/be operated by things to its right postpower: BindingPower _ 0, -- binding power as postfix -- Tells whether the lexeme may/must/must not be applied. to things to its right -- Pick at least one (just prebreak if openfix). prefix: BOOL _ FALSE, -- must be applied to something to its right prearg: BOOL _ FALSE, -- something to its right must be applied to it prebreak: BOOL _ FALSE, -- should not operate on/be operated by things to its right prepower: BindingPower _ 0, -- binding power as prefix -- Unparser data: leftspace: BOOL _ FALSE, -- always put space before this operator, leftstrength: BindingPower _ 0, -- binding power as postfix/closefix for unparsing leftoffset: UnparseOffset _ 0, -- if breaking at left, indent by this many chars rightspace: BOOL _ FALSE, -- always put space after this operator rightstrength: BindingPower _ 0, -- binding power as as prefix/openfix for unparsing rightoffset: UnparseOffset _ 0, -- if breaking at right, indent by this many chars alpha: BOOL _ FALSE -- operator is alphanumeric ]; UnparseOffset: TYPE = INTEGER [-100..100]; -- left margin offset at line breaks when unparsing BindingPower: TYPE = INTEGER [0..1000]; -- operator binding power/breakpoint strength (unp.) NewSyntax: PROC RETURNS [syntax: Syntax]; -- creates a new Syntax table. -- The syntax.chType is initialized by SetDefaultCharTypes below. -- Maximum length of atoms and strings is set to 200 each -- Atoms and literals get the properties postarg+postbreak+prearg+prebreak -- meaning they may start/end expressions, and be used as arguments to prefix -- or postfix operators. SetCharType: PROC [syntax: Syntax, from, to: CHAR, chType: CharType]; -- Sets char type for all characters in [from..to] SetDefaultCharTypes: PROC [syntax: Syntax]; -- sets syntax.chType to the default type array, as follows: -- letters to letter; digits to digit; double quotes to quote; -- CR, SP, LF, FF, NUL, and TAB to blank; -- +-*/|\()[]{}<>=~@#$%&^_:;,.!? to op; -- and all others to other. EnterTwoCharOp: PROC [syntax: Syntax, c1, c2: CHAR]; -- makes c1c2 into a double character lexeme; e.g. -> and .. -- both chars must be of type op EnterAlias: PROC [syntax: Syntax, alias, standard: ATOM]; -- Defines standard as the preferred form of alias MapAlias: PROC [syntax: Syntax, atom: ATOM] RETURNS [standard: ATOM]; -- gets the preferred form of the given atom, if any; otherwise returns the same GetProps: PROC [lex: Se, syntax: Syntax] RETURNS [Props]; -- returns properties of atom or literal (explicitly defined or default) EnterAtomProps: PROC [syntax: Syntax, name: ATOM, props: Props]; -- Defines parse/unparse properties of an atomic lexeme (identifier or operator) missing: Se; -- marks missing operands or closefixs in erroneous parsed expressions Parse: PROC [rope: ROPE, syntax: Syntax] RETURNS [expr: Se, error: ROPE, rest: ROPE]; -- Parses the given rope into a maximal-length Juno expression. -- The symbolic expression thus obtained is returned in expr. -- The unused part of the rope is returned in rest, so it can be further parsed. -- The expression must begin with a lexeme having the postbreak -- property, and finish with one having the prebreak property. -- If unable to parse a prefix of the given rope as an expression with such -- properties, error is set to a non-null message. -- The (usually incomple) expression parsed so far is returned -- in expr, and the unparsed part is returned in rest. -- The atom missing will occur in expr as -- the only argument of prefix operators, or -- the second argument of infix operators, or -- the last argument of openfix operators (if the closefix was not found), or -- the whole expr, -- when the corresponding sub-expression was still unparsed by the time -- the error was found. In that case, the input can be reconstructed -- by Cat[Unparse[expr], rest]. Unparse: PROC [expr: Se, syntax: Syntax, leftMargin, rightMargin: INTEGER] RETURNS [Rope.ROPE]; SyntacticPredicate: TYPE = PROC [expr: Se] RETURNS [VerdictAndCulprit]; Verdict: TYPE = {Yes, No, OfCourseNot}; -- Yes == innocent VerdictAndCulprit: TYPE = RECORD [verdict: Verdict, culprit: REF ANY]; -- culprit relevant only if verdict is No or OfCourseNot; Or: PROC [aw1, aw2: VerdictAndCulprit] RETURNS [r: VerdictAndCulprit]; HasForm: PROC[expr: Se, op: ATOM, Arg1, Arg2: SyntacticPredicate _ NIL] RETURNS [VerdictAndCulprit]; END. 2 HerculesParseUnparse.mesa (ex Lexer + Parser + ParseTable + OldUnparser) Last edited by by Stolfi, February 22, 1984 8:33 am Adapted from Parser.mesa Coded by GNelson (?), September 9, 1982 5:57 pm Last Edited by: GNelson, January 10, 1983 5:50 pm Merged with some of Lexer.mesa Coded by by GNelson (?), September 9, 1982 4:13 pm Merged with OldUnparser.mesa Written by GNelson (?), September 9, 1982 5:44 pm Last Edited by: GNelson, October 11, 1983 9:23 pm Merged with ParseTable.mesa Coded by GNelson (?) September 6, 1982 12:25 am Last Edited by: GNelson, August 24, 1983 10:30 pm Merged with OldParseWindow.mesa Coded by GNelson (?) September 6, 1982 12:25 am Last Edited by: GNelson, October 11, 1983 9:29 pm The Parser/Unparser susbsystem of Juno is concerned with the translation between the external Juno language and the internal "symbolic Expression" (i.e., parse tree) representation. This interface declares the data structures and procedures used by the Juno Parser/Unparser subsystem. The exact syntax of the external language is determined mainly by (1) a "character type table" and an "operator list", that together determine how the the lexical analyzer breaks the input character stream into lexical items; (2) a "parse/unparse table", that gives the binding type and power of each lexical item, and some formatting hints for its unparsing; and (3) a static "syntax checker", a procedure that validates the resulting parse tree. These three items (and therefore the exact syntax of the external language) must be supplied by the client. This interfaces merely describes their structure, and provides tools for their construction, initialization, and usage. - - - - PARSE/UNPARSE TABLES Preferred equivalents may be specified one or more atoms (including one- or two-character operators). The lexer will automatically replace every scanned atom by its preferred form, if any, before passing it to the caller. This is to allow typing english names for operators that have no keys, such as "forall" or "and". - - - - PARSER Simple operator precedence parser for operators of the following binding types: infix, prefix, postfix, selfix (i.e., variables or literals), nullfix ('noise' lexemes to be ignored) openfix (like "begin" or left parenthesis), and closefix (like "end" or right parenthesis). - - - - UNPARSER Unparses a symbolic expression and produces a rope. - - - - SYNTAX CHECKING Checks if a symbolic expression is of a specified form. Edited on February 15 - 20, 1984 10:41 pm, by Stolfi Merged Parser + ParseTable + Lexer + OldUnparser Rewritten. ÊߘšœI™I™4šœ™Jšœ/™/Jšœ1™1—šœ™J™2—šœ™J™1J™1—šœ™J™0J™1—šœ Ïcœ™ J™0J™1———™Jšœ¶™¶šœ©™©Jšœ ™ JšœŠ™ŠJšœT™T—Jšœä™ä—JšÏk œžœžœžœ˜=Jšœž œ˜$Jšžœžœ˜'Jšœ˜šœ™JšœÏbœžœžœ ˜Jš#œ žœžœœžœ-œžœ)œ žœžœžœ œžœžœ !œœœœ˜ãJšœ žœ8Ðcr  œ   œ   ˜¼Jšœ žœžœ žœžœžœ žœ $œ˜|Jšeœžœžœœ žœžœœ žœžœœ žœžœ4œPœ3œ žœžœ,œ žœžœ/œžœžœ<œ!œQœ1œ žœžœ-œ žœžœ0œ žœžœ?œ œœžœžœ*œ#3œ"2œžœžœ(œ$5œ#3œ žœžœœ˜ú Jšœžœžœ 4˜`Jšœžœžœ 4˜]JšœÏn œžœžœ œ ¥˜‡Jšœ¡ œžœžœ2˜{Jš!œ¡œžœœ I         Ÿ  ˜—š œ¡œžœžœ=œ ˜—JšœÁ™Á—Jš œ¡ œžœ#žœ Ïr¢˜qJš œ¡œžœžœžœ žœR˜›Jšœ¡œžœžœ H˜…Jšœ¡œžœžœP˜”—šœ™Jšœ’™’JšœŸœF˜TJš*œ¡œžœžœžœžœžœ{¢œ/¢W  3  œa¢n '¢ ¢Ú¢šœ¢ ˜„ —šœ™J™4Jš œ¡œžœ5žœž œžœ˜b—J˜šœ™J™8Jš œ¡œžœžœ žœ˜IJšœ žœ˜;Jš œžœžœžœžœ9˜…Jšœ¡œžœžœ˜GJš œ¡œžœžœ#žœžœ˜g—Jšœžœ˜J™™4Jšœ0™0Jšœ ™ ——…—t'…