File [Indigo]<Grapevine>MailParser>MailParse.mesa!1

-- File: MailParse.mesa
-- Last edited by Levin: 15-Jan-81 8:48:28

MailParse: DEFINITIONS =

BEGIN

-- General Note: The organization of this interface is motivated by RFC 733,
-- "Standard for the Format of Arpa Network Text Messages", November 21, 1977.
-- Syntactic entities enclosed in meta-linguistic brackets (e.g., <atom>) are
-- taken from this document (where they appear unbracketed), as is much of the
-- general terminology.

-- Types and Constants --

ParseErrorCode: TYPE = {badFieldName, badFieldBody, truncated};

ParseHandle: TYPE[2];
-- Instances of this type represent independent instances of the parser. This
-- permits multiple clients of this interface to be using the parser implementation
-- at the same time.

NameInfo: TYPE = RECORD [nesting: BracketType, hasTag: BOOLEAN, type: NameType];
-- This type is used exclusively with the ParseNameList procedure, described
-- below. The client-supplied 'process' procedure may glean additional
-- information about its parameters from the NameInfo passed to it. The
-- 'nesting' field describes the context of this name in the name list being
-- parsed. If it is 'group' or 'list', the procedure GetListOrGroupName may
-- be called from the 'process' procedure to obtain the name. The 'hasTag'
-- field indicates whether the name is preceded by a tag (or possibly a
-- sequence of tags) of the form :<atom>: . If 'hasTag' is true, the procedure
-- GetTag may be called from the 'process' procedure to obtain the <atom> (or,
-- in the case of a sequence of tags, the <atom>s separated by spaces as in a
-- NameType[multiAtom]). The 'type' field semantics are described below.

NameType: TYPE = {normal, quotedString, multiAtom, file, publicDL, msgDL};
-- 'normal' means that none of the subsequent cases apply.
-- 'quotedString' means the entire simple name is a quoted string.
-- 'multiAtom' means that the simple name consists of more than one atom (and may
-- include quoted strings as constituent atoms). Adjacent atoms are separated
-- by a single space.
-- 'file' means that the simple name begins with '@' and has no other known structure.
-- 'publicDL' means that the simple name is a single atom ending in '↑'.
-- 'msgDL' means that the simple name is a single atom ending in ':'.

BracketType: TYPE = {none, group, list};
-- 'none' means that the name does not appear in a nested context.
-- 'group' means that the name appears in the context xxx: ... ;
-- 'list' means that the name appears in the context xxx< ... >
-- In both cases, xxx is a <phrase>.

maxFieldNameSize: CARDINAL = 30;
-- this is a plausible maximum size for field names, but there is no guarantee
-- that it will not be exceeded. See the description of InitializeParse for a
-- discussion of field name truncation.

maxRecipientLength: CARDINAL = 50;
-- this is a reasonable bound on the total number of significant characters in a
-- recipient name (excluding excess white space, comments, and the like).
-- However, the length of the strings passed to ParseNameList's 'process'
-- procedure may exceed this bound.

endOfInput: CHARACTER = 203C;
-- this character should be returned by the client's 'next' procedure (see
-- InitializeParse) when the end of the input is reached. The client's 'backup'
-- procedure may be called after endOfInput has been returned by the 'next'
-- procedure, and a subsequent call of 'next' should again produce endOfInput.

endOfList: CHARACTER = 204C;
-- this character may be used as an "invisible delimiter" terminating a list of
-- names. It has no other effect.

-- Signals --

ParseError: ERROR [code: ParseErrorCode];

-- Procedures --

InitializeParse: PROCEDURE [next: PROCEDURE RETURNS [CHARACTER],
backup: PROCEDURE, notifyTruncation: BOOLEAN ← FALSE] RETURNS [ParseHandle];
-- Initializes the header parser, and returns a ParseHandle which is to be passed to
-- all other procedures of this interface. Subsequent invocations of GetFieldName,
-- GetFieldBody, and ParseNameList will obtain their input using 'next'. The
-- procedure 'backup' is occasionally called by the parser to "back up" the input
-- sequence. The client should be prepared to back up no less than 10 characters.
-- If 'notifyTruncation' is TRUE, GetFieldName and GetFieldBody will raise
-- ParseError[truncated] if the string they are collecting overflows the string
-- provided. (The signal is not raised until the entire field name or body has
-- been scanned.) If 'notifyTruncation' is FALSE, this signal is suppressed.

FinalizeParse: PROCEDURE [pH: ParseHandle];
-- Finalizes the parser instance specified by 'pH'. This procedure must be
-- called when the client has finished parsing, either because of normal
-- completion or because some error has occurred. After calling this procedure,
-- 'pH' is no longer meaningful and must not be reused. Note: FinalizeParse may
-- not be called while a call to ParseNameList is pending (for the same ParseHandle).

GetFieldName: PROCEDURE [pH: ParseHandle, fieldNameOut: STRING]
RETURNS [found: BOOLEAN];
-- GetFieldName presumes that 'next' (see InitializeParse) is positioned to read the
-- first character of a field name and returns the field name, without the
-- terminating colon, in 'fieldNameOut'. GetFieldName leaves 'next' ready to
-- return the first character following the colon (or, if the end of the message
-- header has been reached, the character (if any) after the two CRs that normally
-- terminate the header). If the field name is too long, the behavior of
-- GetFieldName depends upon the 'notifyTruncation' parameter passed to
-- InitializeParse. Upon return, 'found' is FALSE if no field names remain in the
-- header. If the header field ends prematurely or illegal header characters are
-- encountered, ParseError[badFieldName] is raised.

GetFieldBody: PROCEDURE [
pH: ParseHandle, fieldBodyOut: STRING, suppressWhiteSpace: BOOLEAN ← FALSE];
-- The (remainder of the) current field body is read using 'next' (see
-- InitializeParse) and is returned in 'fieldBodyOut'. If the field
-- body is too long, the behavior GetFieldBody depends upon the 'notifyTruncation'
-- parameter passed to InitializeParse. If the field body
-- terminates before a CR is seen, ParseError[badFieldBody] is raised. Upon
-- return, 'fieldBodyOut' has no initial or terminal white space (blanks and tabs)
-- and, if 'suppressWhiteSpace' is TRUE, each internal run of white space has
-- been replaced by a single blank. ArpaNet folding conventions are also observed.

ParseNameList: PROCEDURE [
pH: ParseHandle,
process: PROCEDURE [STRING, STRING, STRING, NameInfo] RETURNS [BOOLEAN],
write: PROCEDURE [CHARACTER] ← NIL, suppressWhiteSpace: BOOLEAN ← FALSE];
-- ParseNameList expects to read characters using 'next' (see InitializeParse)
-- for a structured field body consisting of a list of recipient names. For each
-- such name encountered, it will call 'process', passing it three string arguments
-- that designate the simple name, registry, and Arpanet host. The simple name is
-- always non-empty. If the registry and/or host is absent, a string of length
-- zero (not NIL) is passed. The string parameters are free from leading, trailing,
-- and excess internal white space, and are guaranteed to be at least
-- 'maxRecipientLength' characters in length. The 'process' routine has a fourth
-- parameter which provides additional information about the name being supplied
-- (see the description of the NameInfo type, above, for details). The registry
-- and Arpanet host parameters may (but not the simple name) be changed by
-- 'process' to alter the qualification of the simple name. This operates as
-- follows: If 'process' returns TRUE, ParseNameList will output the complete
-- name, with potentially altered qualification, by calling 'write'
-- successively with each character. ParseNameList will attempt to preserve the
-- original format of the name as much as possible, including bracketing, comments,
-- and white space, unless 'suppressWhiteSpace' is TRUE. In the latter case,
-- non-significant white space will be eliminated and significant white space
-- (outside of quoted strings) will be replaced by a single space. If 'process'
-- returns FALSE or if 'write' is defaulted, no output occurs. ParseNameList
-- assumes responsibility for outputing appropriate separators (commas) and
-- brackets, based on the values returned by succesive invocations of 'process'.
-- If any syntax errors are detected during parsing, ParseError[badFieldBody] is
-- raised. It is legitimate for the 'process' routine to raise a signal that
-- causes ParseNameList to be unwound.

GetListOrGroupName: PROCEDURE [pH: ParseHandle, name: STRING];
-- This procedure can only reasonably be called from inside the 'process'
-- procedure passed to ParseNameList. The <phrase> that introduces the current
-- list or group is read using 'next' (see InitializeParse) and is returned in
-- 'name'. If the name is too long, the behavior of GetListOrGroupName depends
-- upon the 'notifyTruncation' parameter passed to InitializeParse. Upon return,
-- 'name' has no initial or terminal white space (blanks and tabs) and each
-- internal run of white space has been replaced by a single blank. If
-- GetListOrGroupName is called at an inappropriate time (e.g., when
-- NameInfo.nesting = none), the empty string is returned.

GetTag: PROCEDURE [pH: ParseHandle, tag: STRING];
-- This procedure can only reasonably be called from inside the 'process'
-- procedure passed to ParseNameList. The <atom> appearing within colons that
-- introduces the current name is read using 'next' (see InitializeParse) and is
-- returned in 'tag'. (If a sequence of such atoms, each within colons,
-- precedes the current name, GetTag constructs a <phrase> consisting of this
-- sequence of atoms separated by spaces and returns it in 'tag'. If the atom is
-- too long, the behavior of GetTag depends upon the 'notifyTruncation' parameter
-- passed to InitializeParse. Upon return, 'tag' has no initial or terminal white
-- space (blanks and tabs). If GetTag is called at an inappropriate time
-- (e.g., when NameInfo.hasTag = FALSE), the empty string is returned.

END.