DIRECTORY GVMailParse USING [endOfInput, endOfList, ParseErrorCode], IO, Rope; GVMailParseImpl: CEDAR PROGRAM IMPORTS IO, Rope EXPORTS GVMailParse = BEGIN OPEN GVMailParse, Rope, IO; openComment: CHAR = '(; closeComment: CHAR = '); quoteNext: CHAR = '\\; quotes: CHAR = '"; comma: CHAR = ',; colon: CHAR = ':; semiColon: CHAR = ';; openRoute: CHAR = '<; closeRoute: CHAR = '>; openSquareBracket: CHAR = '[; closeSquareBracket: CHAR = ']; dot: CHAR = '.; atSign: CHAR = '@; TokenType: TYPE = {atom, dot, atSign, comma, colon, semiColon, openRoute, closeRoute, domainLiteral, endOfLine, endOfList, endOfInput}; ParseInfo: PUBLIC TYPE = RECORD [ strm, tokenStrm: IO.STREAM, giveACR: BOOL _ FALSE, char: CHAR _ NUL]; ParseHandle: TYPE = REF ParseInfo; ParseError: PUBLIC ERROR [code: ParseErrorCode] = CODE; SyntaxError: ERROR = CODE; InitializeParse: PUBLIC PROC RETURNS [ph: ParseHandle] = { ph _ NEW[ParseInfo_ [strm: ROS[], tokenStrm: ROS[]]] }; -- of InitializeParse FinalizeParse: PUBLIC PROC [ph: ParseHandle] = { ph.strm.Close[]; ph.tokenStrm.Close[]; }; -- of FinalizeParse -- GetFieldName: PUBLIC PROC [ph: ParseHandle, next: PROC RETURNS [CHAR]] RETURNS [fieldName: ROPE, found: BOOL] = { char: CHAR; blanks: BOOL _ FALSE; ph.strm.Reset[]; DO SELECT char _ Get[ph, next] FROM CR, endOfInput => { fieldName_ RopeFromROS[ph.strm, FALSE]; IF fieldName.Length[] = 0 THEN RETURN[fieldName, FALSE] ELSE ParseError[badFieldName]; }; ': => RETURN[RopeFromROS[ph.strm, FALSE], TRUE]; SP, TAB => blanks _ TRUE; endOfList, < 40C => ERROR ParseError[badFieldName]; ENDCASE => SELECT TRUE FROM blanks => ERROR ParseError[badFieldName]; ENDCASE => ph.strm.PutChar[char]; ENDLOOP; }; -- of GetFieldName -- GetFieldBody: PUBLIC PROC [ph: ParseHandle, next: PROC RETURNS [CHAR], suppressBody: BOOL_ FALSE, suppressWhiteSpace: BOOL _ FALSE] RETURNS [fieldBody: ROPE] = { char: CHAR; spaceSeen: BOOL _ TRUE; -- TRUE means ignore leading spaces RemoveTrailingSpace: PROC = INLINE BEGIN len: INT_ fieldBody.Length[]; WHILE len > 0 AND fieldBody.Fetch[len-1] = SP DO len_ len - 1; ENDLOOP; fieldBody_ fieldBody.Substr[0, len]; END; -- of RemoveTrailingSpace -- IF ~suppressBody THEN ph.strm.Reset[]; DO SELECT char _ Get[ph, next] FROM SP, TAB => BEGIN IF spaceSeen THEN LOOP; IF suppressWhiteSpace THEN {char _ SP; spaceSeen _ TRUE}; END; endOfInput => GO TO Trouble; endOfList, CR => EXIT; ENDCASE => spaceSeen _ FALSE; IF ~suppressBody THEN ph.strm.PutChar[char]; ENDLOOP; IF ~suppressBody THEN { fieldBody_ RopeFromROS[ph.strm, FALSE]; RemoveTrailingSpace[]}; EXITS Trouble => ERROR ParseError[badFieldBody]; }; -- of GetFieldBody -- ParseNameList: PUBLIC PROC [ph: ParseHandle, next: PROC RETURNS[CHAR], process: PROC [ROPE, ROPE, BOOL, BOOL] RETURNS [ROPE, BOOL], write: PROC [CHAR] _ NIL ] = { error: ParseErrorCode _ none; name, registry, token: ROPE; reg: ROPE; doWrite: BOOL; outputStrm: IO.STREAM_ IF write = NIL THEN NIL ELSE ph.strm; tokenStrm: IO.STREAM_ ph.tokenStrm; dotIndex, lastAtomIndex: CARDINAL _ 0; ParseList: PROC = { lookingFor: {name, delim, registry, groupContents, routeAddress}; inRoute, inGroup, needAtSign, seenAtSign: BOOL; oldRegistryLength: CARDINAL; haveAlreadyWritten: BOOL _ FALSE; AppendNameAndRegistry: PROC = { IF registry.Length[] > 0 THEN {name_ name.Cat[".", registry]; registry_ NIL} }; CheckForArpa: PROC = { arpaRegistry: ROPE = "ARPA"; IF registry.Length[] = 0 THEN { dotIndex _ 0; IF seenAtSign THEN registry _ arpaRegistry; }; oldRegistryLength _ registry.Length[]; }; -- of CheckForArpa -- ProcessPhrase: PROC = { IF inRoute THEN SyntaxError; name_ registry_ NIL; IF outputStrm # NIL THEN {outputStrm.PutChar[SP]; outputStrm.PutRope[token]}; DO -- Flush till colon or angle bracket. SELECT GetToken[] FROM colon => BEGIN inGroup _ TRUE; lookingFor _ groupContents; IF outputStrm # NIL THEN outputStrm.PutChar[':]; EXIT; END; openRoute => BEGIN lookingFor _ routeAddress; inRoute _ TRUE; IF outputStrm # NIL THEN {outputStrm.PutChar[SP]; outputStrm.PutChar['<]}; EXIT; END; atom => IF outputStrm # NIL THEN {outputStrm.PutChar[SP]; outputStrm.PutRope[token]}; dot => IF outputStrm # NIL THEN outputStrm.PutChar['.]; ENDCASE => SyntaxError; ENDLOOP; }; -- of ProcessPhrase -- ProcessRoute: PROC = { name_ name.Concat["@"]; needAtSign _ TRUE; IF outputStrm # NIL THEN outputStrm.PutChar['@]; DO SELECT GetToken[] FROM atom, domainLiteral => BEGIN name_ name.Concat[token]; IF outputStrm # NIL THEN outputStrm.PutRope[token]; END; ENDCASE => SyntaxError; SELECT GetToken[] FROM dot => BEGIN name_ name.Concat["."]; IF outputStrm # NIL THEN outputStrm.PutChar['.]; END; comma => IF GetToken[] = atSign THEN BEGIN name_ name.Concat[",@"]; IF outputStrm # NIL THEN outputStrm.PutRope[",@"]; END ELSE SyntaxError; colon => BEGIN name_ name.Concat[":"]; IF outputStrm # NIL THEN outputStrm.PutChar[':]; lookingFor _ name; EXIT; END; ENDCASE => SyntaxError; ENDLOOP; }; -- of ProcessRoute -- GetAtom: PROC = { tooLong: BOOL _ FALSE; DO char: CHAR; SELECT char _ Get[ph, next] FROM SP, TAB => EXIT; dot, atSign, comma, openRoute, closeRoute, endOfList, colon, semiColon, endOfInput, openSquareBracket, openComment, quotes, closeSquareBracket, closeComment => {ph.char _ char; EXIT}; CR => {ph.giveACR _ TRUE; EXIT}; < 40C, quoteNext, DEL => ERROR SyntaxError; ENDCASE => tokenStrm.PutChar[char]; ENDLOOP; }; -- of GetAtom -- GetQuotedString: PROC = { tooLong: BOOL _ FALSE; DO char: CHAR; tokenStrm.PutChar[char _ Get[ph, next]]; SELECT char FROM quoteNext => SELECT char _ Get[ph, next] FROM CR, endOfList, endOfInput => ERROR SyntaxError; ENDCASE => tokenStrm.PutChar[char]; quotes => EXIT; CR, endOfList, endOfInput => ERROR SyntaxError; ENDCASE; ENDLOOP; }; -- of GetQuotedString -- GetDomainLiteral: PROC = { DO char: CHAR; tokenStrm.PutChar[char _ Get[ph, next]]; SELECT char FROM quoteNext => SELECT char _ Get[ph, next] FROM CR, endOfList, endOfInput => ERROR SyntaxError; ENDCASE => tokenStrm.PutChar[char]; closeSquareBracket => RETURN; CR, endOfList, endOfInput => ERROR SyntaxError; ENDCASE; ENDLOOP; EXITS }; -- of GetDomainLiteral -- GetToken: PROC RETURNS [tokenType: TokenType] = { char: CHAR; tokenStrm.Reset[]; DO char _ Get[ph, next]; SELECT char FROM SP, TAB => LOOP; openComment => {FlushComment[]; LOOP}; ENDCASE => EXIT; ENDLOOP; SELECT char FROM dot => RETURN[dot]; atSign => RETURN[atSign]; comma => RETURN[comma]; openRoute => RETURN[openRoute]; closeRoute => RETURN[closeRoute]; CR => RETURN[endOfLine]; endOfList => RETURN[endOfList]; endOfInput => RETURN[endOfInput]; colon => RETURN[colon]; semiColon => RETURN[semiColon]; closeSquareBracket, closeComment => ERROR SyntaxError; openSquareBracket => {tokenStrm.PutChar[char]; GetDomainLiteral[]; tokenType_ domainLiteral}; quotes => {tokenStrm.PutChar[char]; GetQuotedString[]; tokenType_ atom}; ENDCASE => {tokenStrm.PutChar[char]; GetAtom[]; tokenType_ atom}; token_ RopeFromROS[tokenStrm, FALSE]; }; -- of GetToken -- FlushComment: PROC = { char: CHAR; len: INT; IF outputStrm # NIL THEN BEGIN r: ROPE_ RopeFromROS[outputStrm, FALSE]; IF (len_ r.Length[]) > 0 THEN IF r.Fetch[len-1] # SP THEN outputStrm.PutChar[SP]; outputStrm.PutChar['(]; END; DO SELECT (char _ Get[ph, next]) FROM quoteNext => SELECT (char _ Get[ph, next]) FROM CR, endOfList, endOfInput => ERROR SyntaxError; ENDCASE => {outputStrm.PutChar['\\]; outputStrm.PutChar[char]}; closeComment => {IF outputStrm # NIL THEN outputStrm.PutRope[") "]; RETURN}; openComment => FlushComment[]; CR, endOfList, endOfInput => ERROR SyntaxError; ENDCASE => IF outputStrm # NIL THEN outputStrm.PutChar[char]; ENDLOOP; }; -- of FlushComment -- WriteOutput: PROC = { output: ROPE_ RopeFromROS[outputStrm, FALSE]; len, regLen: INT; registryIndex: CARDINAL _ IF dotIndex # 0 THEN lastAtomIndex - oldRegistryLength ELSE 0; IF haveAlreadyWritten THEN {write[',]; write[SP]}; haveAlreadyWritten _ TRUE; FOR i: INT IN [0 .. IF dotIndex = 0 THEN lastAtomIndex ELSE dotIndex) DO write[output.Fetch[i]]; ENDLOOP; IF (regLen_ reg.Length[]) > 0 THEN write['.]; FOR i: INT IN [0 .. regLen) DO write[reg.Fetch[i]]; ENDLOOP; IF dotIndex # 0 THEN FOR i: CARDINAL IN (dotIndex .. registryIndex) DO write[output.Fetch[i]]; ENDLOOP; len_ output.Length[]; WHILE len > 0 AND output.Fetch[len-1] = SP DO len_ len - 1; ENDLOOP; FOR i: INT IN [lastAtomIndex .. len) DO write[output.Fetch[i]]; ENDLOOP; }; -- of WriteOutput -- inGroup _ FALSE; DO -- for each list element. name_ registry_ NIL; dotIndex _ lastAtomIndex _ 0; lookingFor _ name; inRoute _ needAtSign _ seenAtSign _ FALSE; DO -- for tokens within a list element. BEGIN -- for EXITS -- tokenType: TokenType; SELECT (tokenType _ GetToken[]) FROM atom => SELECT lookingFor FROM name, registry => BEGIN IF lookingFor = name THEN name_ name.Concat[token] ELSE registry_ registry.Concat[token]; lookingFor _ delim; IF outputStrm # NIL THEN { outputStrm.PutRope[token]; lastAtomIndex _ RopeFromROS[outputStrm, FALSE].Length[] }; END; groupContents, routeAddress => BEGIN name_ registry_ NIL; needAtSign _ seenAtSign _ FALSE; name_ name.Concat[token]; lookingFor _ delim; IF outputStrm # NIL THEN BEGIN IF lookingFor = groupContents THEN outputStrm.PutChar[SP]; outputStrm.PutRope[token]; lastAtomIndex_ RopeFromROS[outputStrm, FALSE].Length[]; END; END; delim => GO TO SecondChance; ENDCASE; dot => IF lookingFor = delim THEN BEGIN AppendNameAndRegistry[]; lookingFor _ registry; IF outputStrm # NIL THEN { dotIndex_ RopeFromROS[outputStrm, FALSE].Length[]; outputStrm.PutChar['.] }; END ELSE SyntaxError; atSign => SELECT lookingFor FROM delim => BEGIN IF seenAtSign THEN SyntaxError; seenAtSign _ TRUE; AppendNameAndRegistry[]; name_ name.Concat["@"]; lookingFor _ name; dotIndex _ lastAtomIndex _ 0; IF outputStrm # NIL THEN outputStrm.PutChar['@]; END; routeAddress => ProcessRoute[]; name, registry, groupContents => SyntaxError; ENDCASE; comma, endOfLine, endOfList, endOfInput => BEGIN IF inRoute OR (tokenType # comma AND inGroup) THEN SyntaxError; SELECT lookingFor FROM delim => BEGIN CheckForArpa[]; [reg, doWrite]_ process[name, registry, FALSE, inGroup]; IF doWrite AND outputStrm # NIL THEN WriteOutput[]; IF outputStrm # NIL THEN outputStrm.Reset[]; IF tokenType = comma THEN EXIT ELSE RETURN; END; name, groupContents => BEGIN IF inRoute OR seenAtSign THEN SyntaxError; IF registry.Length[] > 0 THEN ERROR; IF tokenType = comma THEN {lookingFor _ name; EXIT} ELSE IF inGroup THEN SyntaxError ELSE RETURN; END; registry, routeAddress => SyntaxError; ENDCASE; END; colon => SELECT lookingFor FROM delim, registry => BEGIN IF inRoute OR inGroup OR seenAtSign THEN SyntaxError; AppendNameAndRegistry[]; inGroup _ TRUE; lookingFor _ groupContents; IF outputStrm # NIL THEN outputStrm.PutChar[':]; dotIndex _ lastAtomIndex _ 0; END; name, groupContents, routeAddress => SyntaxError; ENDCASE; semiColon => BEGIN emptyGroup: BOOL _ FALSE; IF ~inGroup OR inRoute THEN SyntaxError; inGroup _ FALSE; SELECT lookingFor FROM groupContents => {IF name.Length[] = 0 THEN SyntaxError; emptyGroup _ TRUE}; delim => {CheckForArpa[]; emptyGroup _ FALSE}; name => IF name.Length[] > 0 THEN SyntaxError; registry, routeAddress => SyntaxError; ENDCASE; IF outputStrm # NIL THEN outputStrm.PutChar[';]; SELECT (tokenType _ GetToken[]) FROM comma, endOfLine, endOfList, endOfInput => BEGIN IF name.Length[] > 0 THEN BEGIN [reg, doWrite]_ process[name, registry, emptyGroup, TRUE]; IF doWrite AND outputStrm # NIL THEN WriteOutput[]; END ELSE IF outputStrm # NIL THEN BEGIN registry_ NIL; lastAtomIndex _ dotIndex _ 0; WriteOutput[]; END; IF outputStrm # NIL THEN outputStrm.Reset[]; IF tokenType = comma THEN EXIT ELSE RETURN; END; ENDCASE => SyntaxError; END; openRoute => BEGIN IF inRoute THEN SyntaxError; inRoute _ TRUE; name_ registry_ NIL; dotIndex _ lastAtomIndex _ 0; lookingFor _ routeAddress; IF outputStrm # NIL THEN BEGIN len: INT; r: ROPE; IF (len_ (r_ RopeFromROS[outputStrm, FALSE]).Length[]) > 0 THEN IF r.Fetch[len-1] # SP THEN outputStrm.PutChar[SP]; outputStrm.PutChar['<]; END; END; closeRoute => BEGIN IF ~inRoute OR lookingFor # delim THEN SyntaxError; inRoute _ FALSE; IF needAtSign AND ~seenAtSign THEN SyntaxError; CheckForArpa[]; IF outputStrm # NIL THEN outputStrm.PutChar['>]; SELECT (tokenType _ GetToken[]) FROM comma, endOfLine, endOfList, endOfInput, semiColon => BEGIN IF inGroup AND ((tokenType # comma) AND (tokenType # semiColon)) THEN SyntaxError; IF outputStrm # NIL AND tokenType=semiColon THEN outputStrm.PutChar[';]; [reg, doWrite]_ process[name, registry, FALSE, TRUE]; IF doWrite AND outputStrm # NIL THEN WriteOutput[]; IF outputStrm # NIL THEN outputStrm.Reset[]; IF tokenType = semiColon THEN {inGroup_ FALSE; EXIT}; IF tokenType = comma THEN EXIT ELSE RETURN; END; ENDCASE => SyntaxError; END; domainLiteral => SELECT lookingFor FROM name, registry => BEGIN IF ~seenAtSign THEN SyntaxError; IF lookingFor = name THEN name_ name.Concat[token] ELSE registry_ registry.Concat[token]; lookingFor_ delim; IF outputStrm # NIL THEN { outputStrm.PutRope[token]; dotIndex_ 0; lastAtomIndex_ RopeFromROS[outputStrm, FALSE].Length[] }; END; delim, groupContents, routeAddress => SyntaxError; ENDCASE; ENDCASE => ERROR; EXITS SecondChance => ProcessPhrase[]; END; -- of EXITS block -- ENDLOOP; ENDLOOP; -- for each list element. }; -- of ParseList -- IF outputStrm # NIL THEN outputStrm.Reset[]; ParseList[ ! SyntaxError => {error _ badFieldBody; CONTINUE}]; IF error # none THEN ERROR ParseError[error]; }; -- of ParseNameList -- Get: PROC [ph: ParseHandle, next: PROC RETURNS[CHAR]] RETURNS [char: CHAR] = { IF ph.giveACR THEN {ph.giveACR _ FALSE; RETURN[CR]}; IF ph.char = NUL THEN BEGIN IF (char _ next[]) = CR THEN BEGIN ph.char _ next[]; SELECT ph.char FROM SP, TAB => {ph.char _ NUL; RETURN[SP]}; ENDCASE; END; END ELSE {char _ ph.char; ph.char _ NUL}; }; -- of Get -- END. $GVMailParseImpl.mesa Copyright c 1985 by Xerox Corporation. All rights reserved. Based on ArpaMailParser.mesa by Brotz, March 6, 1983 3:29 PM Last Edited by: Willie-sue, May 30, 1985 1:15:20 pm PDT Doug Wyatt, March 7, 1985 11:07:34 am PST Types Some special characters specified by RFC 822 Initializes the header parser, and returns a ParseHandle which is to be passed to all other procedures of this interface. Subsequent invocations of GetFieldName, GetFieldBody, and ParseNameList will obtain their input using "next". If "notifyTruncation" is TRUE, GetFieldName and GetFieldBody will raise ParseError[truncated] if the string they are collecting overflows the string provided. (The signal is not raised until the entire field name or body has been scanned.) If "notifyTruncation" is FALSE, this signal is suppressed. Finalizes the parser instance specified by "pH". This procedure must be called when the client has finished parsing, either because of normal completion or because some error has occurred. After calling this procedure, "pH" is no longer meaningful and must not be reused. Note: FinalizeParse may not be called while a call to ParseNameList is pending (for the same ParseHandle). GetFieldName presumes that "next" (see InitializeParse) is positioned to read the first character of a field name and returns the field name, without the terminating colon, as "fieldName". GetFieldName leaves "next" ready to return the first character following the colon (or, if the end of the message header has been reached, the character (if any) after the two CRs that normally terminate the header). Upon return, "found" is FALSE if no field names remain in the header. If the header field ends prematurely or illegal header characters are encountered, ParseError[badFieldName] is raised. The (remainder of the) current field body is read using "next" (see InitializeParse) and is returned as "fieldBody". If the field body terminates before a CR is seen, ParseError[badFieldBody] is raised. Upon return, "fieldBody" has no initial or terminal white space (blanks and tabs) and, if "suppressWhiteSpace" is TRUE, each internal run of white space has been replaced by a single blank. ArpaNet folding conventions are also observed. IF suppressBody is TRUE, then no fieldBody is generated process PROC [simpleName, registry, isFile, isNested] RETURNS [reg, write] -- ParseNameList expects to read characters using "next" (see InitializeParse) for a structured field body consisting of a list of recipient names. For each such name encountered, it will call "process", passing it two string arguments that designate the simple name and registry. The simple name is always non-empty. If the registry is absent, a string of length zero (not NIL) is passed. If the simple name contains an Arpanet host name, the registry passed is "Arpa". The rope parameters are free from leading, trailing and excess internal white space The "process" routine has a third parameter that indicates, if TRUE, that the simple name is a file name, if FALSE, that the simple name and registry combine to form a normal name. The fourth parameter supplied to "process" indicates, if TRUE, that the name was "nested", i.e., it occurred within brackets or within a group. This is useful to the Answer client who may wish to suppress duplicate elimination in such cases. If any syntax errors are detected during parsing, ParseError[badFieldBody] is raised. It is legitimate for the "process" routine to raise a signal that causes ParseNameList to be unwound. Local procedure ParseList does all the work. It is a local procedure so we can catch its ERRORs easily. Semantics of states: name: expect the first atom of a name or the first atom of a domain. these can be distinguished by the seenAtSign Boolean. delim: have just seen an atom, expect a delimiter: dot, atSign, closeRoute, semiColon, endOfLine, or endOfList. registry: have just seen a dot, now expect an atom that is a registry candidate. groupContents: have just seen a group opening colon. This state is ephemeral-we are looking for an immediate semiColon that will indicate that the name accumulated so far is a filename. If the next token is not a semiColon, treat as lookingFor=name (but clear the accumulated name first). routeAddress: have just seen an openRoute. Handle the awful route syntax: @domain,@domain, ... @domain: If not immediately followed by an atSign, treat as lookingFor=name. Typical sequence of states is name, delim, (if see dot:)[ registry, delim, (if see atSign:)[name, delim,[registry, delim]*]] (dotIndex # 0) = registry exists in the output string. locate the registry in the output string write up to the dot write the dot write the registry write the comments between the dot and the registry write the rest abnormal cases follow. be careful, might have no name if extra trailing comma preceded the semi. ASSERT: CheckForArpa was called if emptyGroup is FALSE. this next line is a kludge of the worse kind!!! Body of ParseNameList Obtains next input character and smoothes over a few lexical quirks. This procedure deals with Arpa-standard line-folding. Κ˜codešœ™Kšœ Οmœ1™Kšžœžœžœ˜-Kšœ‘˜——˜š œžœžœžœžœžœžœ˜NKšœT™TKšœ&™&Kš žœ žœžœžœžœ˜4šžœ žœž˜Kšž˜šžœžœž˜Kšž˜K˜šžœ ž˜Kš žœžœžœžœžœ˜'Kšžœ˜—Kšžœ˜—Kšž˜—Kšžœžœ˜%Kšœ‘ ˜——K˜Kšžœ˜—…—6*b_