-- File: MailParse.mesa
-- Last edited by Levin: 15-Jan-81  8:48:28

MailParse: DEFINITIONS =

  BEGIN

  -- General Note:  The organization of this interface is motivated by RFC 733,
  -- "Standard for the Format of Arpa Network Text Messages", November 21, 1977.
  -- Syntactic entities enclosed in meta-linguistic brackets (e.g., <atom>) are
  -- taken from this document (where they appear unbracketed), as is much of the
  -- general terminology.


  -- Types and Constants --

  ParseErrorCode: TYPE = {badFieldName, badFieldBody, truncated};

  ParseHandle: TYPE[2];
    -- Instances of this type represent independent instances of the parser.  This
    -- permits multiple clients of this interface to be using the parser implementation
    -- at the same time.

  NameInfo: TYPE = RECORD [nesting: BracketType, hasTag: BOOLEAN, type: NameType];
    -- This type is used exclusively with the ParseNameList procedure, described
    -- below.  The client-supplied 'process' procedure may glean additional
    -- information about its parameters from the NameInfo passed to it.  The
    -- 'nesting' field describes the context of this name in the name list being
    -- parsed.  If it is 'group' or 'list', the procedure GetListOrGroupName may
    -- be called from the 'process' procedure to obtain the name.  The 'hasTag'
    -- field indicates whether the name is preceded by a tag (or possibly a
    -- sequence of tags) of the form :<atom>: .  If 'hasTag' is true, the procedure
    -- GetTag may be called from the 'process' procedure to obtain the <atom> (or,
    -- in the case of a sequence of tags, the <atom>s separated by spaces as in a
    -- NameType[multiAtom]).  The 'type' field semantics are described below.

  NameType: TYPE = {normal, quotedString, multiAtom, file, publicDL, msgDL};
    -- 'normal' means that none of the subsequent cases apply.
    -- 'quotedString' means the entire simple name is a quoted string.
    -- 'multiAtom' means that the simple name consists of more than one atom (and may
    --    include quoted strings as constituent atoms).  Adjacent atoms are separated
    --    by a single space.
    -- 'file' means that the simple name begins with '@' and has no other known structure.
    -- 'publicDL' means that the simple name is a single atom ending in '↑'.
    -- 'msgDL' means that the simple name is a single atom ending in ':'.

  BracketType: TYPE = {none, group, list};
    -- 'none' means that the name does not appear in a nested context.
    -- 'group' means that the name appears in the context xxx: ... ;
    -- 'list' means that the name appears in the context xxx< ... >
    -- In both cases, xxx is a <phrase>.

  maxFieldNameSize: CARDINAL = 30;
    -- this is a plausible maximum size for field names, but there is no guarantee
    -- that it will not be exceeded.  See the description of InitializeParse for a
    -- discussion of field name truncation.

  maxRecipientLength: CARDINAL = 50;
    -- this is a reasonable bound on the total number of significant characters in a
    -- recipient name (excluding excess white space, comments, and the like).
    -- However, the length of the strings passed to ParseNameList's 'process'
    -- procedure may exceed this bound.

  endOfInput: CHARACTER = 203C;
    -- this character should be returned by the client's 'next' procedure (see
    -- InitializeParse) when the end of the input is reached.  The client's 'backup'
    -- procedure may be called after endOfInput has been returned by the 'next'
    -- procedure, and a subsequent call of 'next' should again produce endOfInput.

  endOfList: CHARACTER = 204C;
    -- this character may be used as an "invisible delimiter" terminating a list of
    -- names.  It has no other effect.


  -- Signals --

  ParseError: ERROR [code: ParseErrorCode];


  -- Procedures --

  InitializeParse: PROCEDURE [next: PROCEDURE RETURNS [CHARACTER],
    backup: PROCEDURE, notifyTruncation: BOOLEAN ← FALSE] RETURNS [ParseHandle];
    -- Initializes the header parser, and returns a ParseHandle which is to be passed to
    -- all other procedures of this interface.  Subsequent invocations of GetFieldName,
    -- GetFieldBody, and ParseNameList will obtain their input using 'next'.  The
    -- procedure 'backup' is occasionally called by the parser to "back up" the input
    -- sequence.  The client should be prepared to back up no less than 10 characters.
    -- If 'notifyTruncation' is TRUE, GetFieldName and GetFieldBody will raise
    -- ParseError[truncated] if the string they are collecting overflows the string
    -- provided.  (The signal is not raised until the entire field name or body has
    -- been scanned.)  If 'notifyTruncation' is FALSE, this signal is suppressed.

  FinalizeParse: PROCEDURE [pH: ParseHandle];
    -- Finalizes the parser instance specified by 'pH'.  This procedure must be
    -- called when the client has finished parsing, either because of normal
    -- completion or because some error has occurred.  After calling this procedure,
    -- 'pH' is no longer meaningful and must not be reused.  Note: FinalizeParse may
    -- not be called while a call to ParseNameList is pending (for the same ParseHandle).

  GetFieldName: PROCEDURE [pH: ParseHandle, fieldNameOut: STRING]
    RETURNS [found: BOOLEAN];
    -- GetFieldName presumes that 'next' (see InitializeParse) is positioned to read the
    -- first character of a field name and returns the field name, without the
    -- terminating colon, in 'fieldNameOut'.  GetFieldName leaves 'next' ready to
    -- return the first character following the colon (or, if the end of the message
    -- header has been reached, the character (if any) after the two CRs that normally
    -- terminate the header).  If the field name is too long, the behavior of
    -- GetFieldName depends upon the 'notifyTruncation' parameter passed to
    -- InitializeParse.  Upon return, 'found' is FALSE if no field names remain in the
    -- header.  If the header field ends prematurely or illegal header characters are
    -- encountered, ParseError[badFieldName] is raised.

  GetFieldBody: PROCEDURE [
    pH: ParseHandle, fieldBodyOut: STRING, suppressWhiteSpace: BOOLEAN ← FALSE];
    -- The (remainder of the) current field body is read using 'next' (see
    -- InitializeParse) and is returned in 'fieldBodyOut'.  If the field
    -- body is too long, the behavior GetFieldBody depends upon the 'notifyTruncation'
    -- parameter passed to InitializeParse.  If the field body
    -- terminates before a CR is seen, ParseError[badFieldBody] is raised.  Upon
    -- return, 'fieldBodyOut' has no initial or terminal white space (blanks and tabs)
    -- and, if 'suppressWhiteSpace' is TRUE, each internal run of white space has
    -- been replaced by a single blank.  ArpaNet folding conventions are also observed.

  ParseNameList: PROCEDURE [
    pH: ParseHandle,
    process: PROCEDURE [STRING, STRING, STRING, NameInfo] RETURNS [BOOLEAN],
    write: PROCEDURE [CHARACTER] ← NIL, suppressWhiteSpace: BOOLEAN ← FALSE];
    -- ParseNameList expects to read characters using 'next' (see InitializeParse)
    -- for a structured field body consisting of a list of recipient names.  For each
    -- such name encountered, it will call 'process', passing it three string arguments
    -- that designate the simple name, registry, and Arpanet host.  The simple name is
    -- always non-empty.  If the registry and/or host is absent, a string of length
    -- zero (not NIL) is passed.  The string parameters are free from leading, trailing,
    -- and excess internal white space, and are guaranteed to be at least
    -- 'maxRecipientLength' characters in length.  The 'process' routine has a fourth
    -- parameter which provides additional information about the name being supplied
    -- (see the description of the NameInfo type, above, for details).  The registry
    -- and Arpanet host parameters may (but not the simple name) be changed by
    -- 'process' to alter the qualification of the simple name.  This operates as
    -- follows:  If 'process' returns TRUE, ParseNameList will output the complete
    -- name, with potentially altered qualification, by calling 'write'
    -- successively with each character.  ParseNameList will attempt to preserve the
    -- original format of the name as much as possible, including bracketing, comments,
    -- and white space, unless 'suppressWhiteSpace' is TRUE.  In the latter case,
    -- non-significant white space will be eliminated and significant white space
    -- (outside of quoted strings) will be replaced by a single space.  If 'process'
    -- returns FALSE or if 'write' is defaulted, no output occurs.  ParseNameList
    -- assumes responsibility for outputing appropriate separators (commas) and
    -- brackets, based on the values returned by succesive invocations of 'process'.
    -- If any syntax errors are detected during parsing, ParseError[badFieldBody] is
    -- raised.  It is legitimate for the 'process' routine to raise a signal that
    -- causes ParseNameList to be unwound.

  GetListOrGroupName: PROCEDURE [pH: ParseHandle, name: STRING];
    -- This procedure can only reasonably be called from inside the 'process'
    -- procedure passed to ParseNameList.  The <phrase> that introduces the current
    -- list or group is read using 'next' (see InitializeParse) and is returned in
    -- 'name'.  If the name is too long, the behavior of GetListOrGroupName depends
    -- upon the 'notifyTruncation' parameter passed to InitializeParse.  Upon return,
    -- 'name' has no initial or terminal white space (blanks and tabs) and each
    -- internal run of white space has been replaced by a single blank.  If
    -- GetListOrGroupName is called at an inappropriate time (e.g., when
    -- NameInfo.nesting = none), the empty string is returned.

  GetTag: PROCEDURE [pH: ParseHandle, tag: STRING];
    -- This procedure can only reasonably be called from inside the 'process'
    -- procedure passed to ParseNameList.  The <atom> appearing within colons that
    -- introduces the current name is read using 'next' (see InitializeParse) and is
    -- returned in 'tag'.  (If a sequence of such atoms, each within colons,
    -- precedes the current name, GetTag constructs a <phrase> consisting of this
    -- sequence of atoms separated by spaces and returns it in 'tag'.  If the atom is
    -- too long, the behavior of GetTag depends upon the 'notifyTruncation' parameter
    -- passed to InitializeParse.  Upon return, 'tag' has no initial or terminal white
    -- space (blanks and tabs).  If GetTag is called at an inappropriate time
    -- (e.g., when NameInfo.hasTag = FALSE), the empty string is returned.


  END.