<> <> <> <> <> DIRECTORY GVMailParse USING [endOfInput, endOfList, ParseErrorCode], IO, Rope; GVMailParseImpl: CEDAR PROGRAM IMPORTS IO, Rope EXPORTS GVMailParse = BEGIN OPEN GVMailParse, Rope, IO; <> <> openComment: CHAR = '(; closeComment: CHAR = '); quoteNext: CHAR = '\\; quotes: CHAR = '"; comma: CHAR = ',; colon: CHAR = ':; semiColon: CHAR = ';; openRoute: CHAR = '<; closeRoute: CHAR = '>; openSquareBracket: CHAR = '[; closeSquareBracket: CHAR = ']; dot: CHAR = '.; atSign: CHAR = '@; TokenType: TYPE = {atom, dot, atSign, comma, colon, semiColon, openRoute, closeRoute, domainLiteral, endOfLine, endOfList, endOfInput}; ParseInfo: PUBLIC TYPE = RECORD [ strm, tokenStrm: IO.STREAM, giveACR: BOOL _ FALSE, char: CHAR _ NUL]; ParseHandle: TYPE = REF ParseInfo; ParseError: PUBLIC ERROR [code: ParseErrorCode] = CODE; SyntaxError: ERROR = CODE; InitializeParse: PUBLIC PROC RETURNS [ph: ParseHandle] = <> <> <> <> <> <> <> { ph _ NEW[ParseInfo_ [strm: ROS[], tokenStrm: ROS[]]] }; -- of InitializeParse FinalizeParse: PUBLIC PROC [ph: ParseHandle] = { <> <> <> <> <> ph.strm.Close[]; ph.tokenStrm.Close[]; }; -- of FinalizeParse -- GetFieldName: PUBLIC PROC [ph: ParseHandle, next: PROC RETURNS [CHAR]] RETURNS [fieldName: ROPE, found: BOOL] = { <> <> <> <> <> <> <> <> char: CHAR; blanks: BOOL _ FALSE; ph.strm.Reset[]; DO SELECT char _ Get[ph, next] FROM CR, endOfInput => { fieldName_ RopeFromROS[ph.strm, FALSE]; IF fieldName.Length[] = 0 THEN RETURN[fieldName, FALSE] ELSE ParseError[badFieldName]; }; ': => RETURN[RopeFromROS[ph.strm, FALSE], TRUE]; SP, TAB => blanks _ TRUE; endOfList, < 40C => ERROR ParseError[badFieldName]; ENDCASE => SELECT TRUE FROM blanks => ERROR ParseError[badFieldName]; ENDCASE => ph.strm.PutChar[char]; ENDLOOP; }; -- of GetFieldName -- GetFieldBody: PUBLIC PROC [ph: ParseHandle, next: PROC RETURNS [CHAR], suppressBody: BOOL_ FALSE, suppressWhiteSpace: BOOL _ FALSE] RETURNS [fieldBody: ROPE] = { <> <> <> <> <> <> <> char: CHAR; spaceSeen: BOOL _ TRUE; -- TRUE means ignore leading spaces RemoveTrailingSpace: PROC = INLINE BEGIN len: INT_ fieldBody.Length[]; WHILE len > 0 AND fieldBody.Fetch[len-1] = SP DO len_ len - 1; ENDLOOP; fieldBody_ fieldBody.Substr[0, len]; END; -- of RemoveTrailingSpace -- IF ~suppressBody THEN ph.strm.Reset[]; DO SELECT char _ Get[ph, next] FROM SP, TAB => BEGIN IF spaceSeen THEN LOOP; IF suppressWhiteSpace THEN {char _ SP; spaceSeen _ TRUE}; END; endOfInput => GO TO Trouble; endOfList, CR => EXIT; ENDCASE => spaceSeen _ FALSE; IF ~suppressBody THEN ph.strm.PutChar[char]; ENDLOOP; IF ~suppressBody THEN { fieldBody_ RopeFromROS[ph.strm, FALSE]; RemoveTrailingSpace[]}; EXITS Trouble => ERROR ParseError[badFieldBody]; }; -- of GetFieldBody -- ParseNameList: PUBLIC PROC [ph: ParseHandle, next: PROC RETURNS[CHAR], process: PROC [ROPE, ROPE, BOOL, BOOL] RETURNS [ROPE, BOOL], <> write: PROC [CHAR] _ NIL ] = { <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> error: ParseErrorCode _ none; name, registry, token: ROPE; reg: ROPE; doWrite: BOOL; outputStrm: IO.STREAM_ IF write = NIL THEN NIL ELSE ph.strm; tokenStrm: IO.STREAM_ ph.tokenStrm; dotIndex, lastAtomIndex: CARDINAL _ 0; <> <> ParseList: PROC = { lookingFor: {name, delim, registry, groupContents, routeAddress}; <> <> <> <> <> <> <> <> <> <> <> <<@domain,@domain, ... @domain:>> <> <> <> inRoute, inGroup, needAtSign, seenAtSign: BOOL; oldRegistryLength: CARDINAL; haveAlreadyWritten: BOOL _ FALSE; AppendNameAndRegistry: PROC = { IF registry.Length[] > 0 THEN {name_ name.Cat[".", registry]; registry_ NIL} }; CheckForArpa: PROC = { arpaRegistry: ROPE = "ARPA"; IF registry.Length[] = 0 THEN { dotIndex _ 0; IF seenAtSign THEN registry _ arpaRegistry; }; oldRegistryLength _ registry.Length[]; }; -- of CheckForArpa -- ProcessPhrase: PROC = { IF inRoute THEN SyntaxError; name_ registry_ NIL; IF outputStrm # NIL THEN {outputStrm.PutChar[SP]; outputStrm.PutRope[token]}; DO -- Flush till colon or angle bracket. SELECT GetToken[] FROM colon => BEGIN inGroup _ TRUE; lookingFor _ groupContents; IF outputStrm # NIL THEN outputStrm.PutChar[':]; EXIT; END; openRoute => BEGIN lookingFor _ routeAddress; inRoute _ TRUE; IF outputStrm # NIL THEN {outputStrm.PutChar[SP]; outputStrm.PutChar['<]}; EXIT; END; atom => IF outputStrm # NIL THEN {outputStrm.PutChar[SP]; outputStrm.PutRope[token]}; dot => IF outputStrm # NIL THEN outputStrm.PutChar['.]; ENDCASE => SyntaxError; ENDLOOP; }; -- of ProcessPhrase -- ProcessRoute: PROC = { name_ name.Concat["@"]; needAtSign _ TRUE; IF outputStrm # NIL THEN outputStrm.PutChar['@]; DO SELECT GetToken[] FROM atom, domainLiteral => BEGIN name_ name.Concat[token]; IF outputStrm # NIL THEN outputStrm.PutRope[token]; END; ENDCASE => SyntaxError; SELECT GetToken[] FROM dot => BEGIN name_ name.Concat["."]; IF outputStrm # NIL THEN outputStrm.PutChar['.]; END; comma => IF GetToken[] = atSign THEN BEGIN name_ name.Concat[",@"]; IF outputStrm # NIL THEN outputStrm.PutRope[",@"]; END ELSE SyntaxError; colon => BEGIN name_ name.Concat[":"]; IF outputStrm # NIL THEN outputStrm.PutChar[':]; lookingFor _ name; EXIT; END; ENDCASE => SyntaxError; ENDLOOP; }; -- of ProcessRoute -- GetAtom: PROC = { tooLong: BOOL _ FALSE; DO char: CHAR; SELECT char _ Get[ph, next] FROM SP, TAB => EXIT; dot, atSign, comma, openRoute, closeRoute, endOfList, colon, semiColon, endOfInput, openSquareBracket, openComment, quotes, closeSquareBracket, closeComment => {ph.char _ char; EXIT}; CR => {ph.giveACR _ TRUE; EXIT}; < 40C, quoteNext, DEL => ERROR SyntaxError; ENDCASE => tokenStrm.PutChar[char]; ENDLOOP; }; -- of GetAtom -- GetQuotedString: PROC = { tooLong: BOOL _ FALSE; DO char: CHAR; tokenStrm.PutChar[char _ Get[ph, next]]; SELECT char FROM quoteNext => SELECT char _ Get[ph, next] FROM CR, endOfList, endOfInput => ERROR SyntaxError; ENDCASE => tokenStrm.PutChar[char]; quotes => EXIT; CR, endOfList, endOfInput => ERROR SyntaxError; ENDCASE; ENDLOOP; }; -- of GetQuotedString -- GetDomainLiteral: PROC = { DO char: CHAR; tokenStrm.PutChar[char _ Get[ph, next]]; SELECT char FROM quoteNext => SELECT char _ Get[ph, next] FROM CR, endOfList, endOfInput => ERROR SyntaxError; ENDCASE => tokenStrm.PutChar[char]; closeSquareBracket => RETURN; CR, endOfList, endOfInput => ERROR SyntaxError; ENDCASE; ENDLOOP; EXITS }; -- of GetDomainLiteral -- GetToken: PROC RETURNS [tokenType: TokenType] = { char: CHAR; tokenStrm.Reset[]; DO char _ Get[ph, next]; SELECT char FROM SP, TAB => LOOP; openComment => {FlushComment[]; LOOP}; ENDCASE => EXIT; ENDLOOP; SELECT char FROM dot => RETURN[dot]; atSign => RETURN[atSign]; comma => RETURN[comma]; openRoute => RETURN[openRoute]; closeRoute => RETURN[closeRoute]; CR => RETURN[endOfLine]; endOfList => RETURN[endOfList]; endOfInput => RETURN[endOfInput]; colon => RETURN[colon]; semiColon => RETURN[semiColon]; closeSquareBracket, closeComment => ERROR SyntaxError; openSquareBracket => {tokenStrm.PutChar[char]; GetDomainLiteral[]; tokenType_ domainLiteral}; quotes => {tokenStrm.PutChar[char]; GetQuotedString[]; tokenType_ atom}; ENDCASE => {tokenStrm.PutChar[char]; GetAtom[]; tokenType_ atom}; token_ RopeFromROS[tokenStrm, FALSE]; }; -- of GetToken -- FlushComment: PROC = { char: CHAR; len: INT; IF outputStrm # NIL THEN BEGIN r: ROPE_ RopeFromROS[outputStrm, FALSE]; IF (len_ r.Length[]) > 0 THEN IF r.Fetch[len-1] # SP THEN outputStrm.PutChar[SP]; outputStrm.PutChar['(]; END; DO SELECT (char _ Get[ph, next]) FROM quoteNext => SELECT (char _ Get[ph, next]) FROM CR, endOfList, endOfInput => ERROR SyntaxError; ENDCASE => {outputStrm.PutChar['\\]; outputStrm.PutChar[char]}; closeComment => {IF outputStrm # NIL THEN outputStrm.PutRope[") "]; RETURN}; openComment => FlushComment[]; CR, endOfList, endOfInput => ERROR SyntaxError; ENDCASE => IF outputStrm # NIL THEN outputStrm.PutChar[char]; ENDLOOP; }; -- of FlushComment -- WriteOutput: PROC = { <<(dotIndex # 0) = registry exists in the output string.>> <> output: ROPE_ RopeFromROS[outputStrm, FALSE]; len, regLen: INT; registryIndex: CARDINAL _ IF dotIndex # 0 THEN lastAtomIndex - oldRegistryLength ELSE 0; IF haveAlreadyWritten THEN {write[',]; write[SP]}; haveAlreadyWritten _ TRUE; <> FOR i: INT IN [0 .. IF dotIndex = 0 THEN lastAtomIndex ELSE dotIndex) DO write[output.Fetch[i]]; ENDLOOP; <> IF (regLen_ reg.Length[]) > 0 THEN write['.]; <> FOR i: INT IN [0 .. regLen) DO write[reg.Fetch[i]]; ENDLOOP; <> IF dotIndex # 0 THEN FOR i: CARDINAL IN (dotIndex .. registryIndex) DO write[output.Fetch[i]]; ENDLOOP; <> len_ output.Length[]; WHILE len > 0 AND output.Fetch[len-1] = SP DO len_ len - 1; ENDLOOP; FOR i: INT IN [lastAtomIndex .. len) DO write[output.Fetch[i]]; ENDLOOP; }; -- of WriteOutput -- inGroup _ FALSE; DO -- for each list element. name_ registry_ NIL; dotIndex _ lastAtomIndex _ 0; lookingFor _ name; inRoute _ needAtSign _ seenAtSign _ FALSE; DO -- for tokens within a list element. BEGIN -- for EXITS -- tokenType: TokenType; SELECT (tokenType _ GetToken[]) FROM atom => SELECT lookingFor FROM name, registry => BEGIN IF lookingFor = name THEN name_ name.Concat[token] ELSE registry_ registry.Concat[token]; lookingFor _ delim; IF outputStrm # NIL THEN { outputStrm.PutRope[token]; lastAtomIndex _ RopeFromROS[outputStrm, FALSE].Length[] }; END; groupContents, routeAddress => BEGIN name_ registry_ NIL; needAtSign _ seenAtSign _ FALSE; name_ name.Concat[token]; lookingFor _ delim; IF outputStrm # NIL THEN BEGIN IF lookingFor = groupContents THEN outputStrm.PutChar[SP]; outputStrm.PutRope[token]; lastAtomIndex_ RopeFromROS[outputStrm, FALSE].Length[]; END; END; <> delim => GO TO SecondChance; ENDCASE; dot => IF lookingFor = delim THEN BEGIN AppendNameAndRegistry[]; lookingFor _ registry; IF outputStrm # NIL THEN { dotIndex_ RopeFromROS[outputStrm, FALSE].Length[]; outputStrm.PutChar['.] }; END ELSE SyntaxError; atSign => SELECT lookingFor FROM delim => BEGIN IF seenAtSign THEN SyntaxError; seenAtSign _ TRUE; AppendNameAndRegistry[]; name_ name.Concat["@"]; lookingFor _ name; dotIndex _ lastAtomIndex _ 0; IF outputStrm # NIL THEN outputStrm.PutChar['@]; END; routeAddress => ProcessRoute[]; name, registry, groupContents => SyntaxError; ENDCASE; comma, endOfLine, endOfList, endOfInput => BEGIN IF inRoute OR (tokenType # comma AND inGroup) THEN SyntaxError; SELECT lookingFor FROM delim => BEGIN CheckForArpa[]; [reg, doWrite]_ process[name, registry, FALSE, inGroup]; IF doWrite AND outputStrm # NIL THEN WriteOutput[]; IF outputStrm # NIL THEN outputStrm.Reset[]; IF tokenType = comma THEN EXIT ELSE RETURN; END; name, groupContents => BEGIN IF inRoute OR seenAtSign THEN SyntaxError; IF registry.Length[] > 0 THEN ERROR; IF tokenType = comma THEN {lookingFor _ name; EXIT} ELSE IF inGroup THEN SyntaxError ELSE RETURN; END; registry, routeAddress => SyntaxError; ENDCASE; END; colon => SELECT lookingFor FROM delim, registry => BEGIN IF inRoute OR inGroup OR seenAtSign THEN SyntaxError; AppendNameAndRegistry[]; inGroup _ TRUE; lookingFor _ groupContents; IF outputStrm # NIL THEN outputStrm.PutChar[':]; dotIndex _ lastAtomIndex _ 0; END; name, groupContents, routeAddress => SyntaxError; ENDCASE; semiColon => BEGIN emptyGroup: BOOL _ FALSE; IF ~inGroup OR inRoute THEN SyntaxError; inGroup _ FALSE; SELECT lookingFor FROM groupContents => {IF name.Length[] = 0 THEN SyntaxError; emptyGroup _ TRUE}; delim => {CheckForArpa[]; emptyGroup _ FALSE}; name => IF name.Length[] > 0 THEN SyntaxError; registry, routeAddress => SyntaxError; ENDCASE; IF outputStrm # NIL THEN outputStrm.PutChar[';]; SELECT (tokenType _ GetToken[]) FROM comma, endOfLine, endOfList, endOfInput => BEGIN IF name.Length[] > 0 THEN BEGIN <> <> [reg, doWrite]_ process[name, registry, emptyGroup, TRUE]; IF doWrite AND outputStrm # NIL THEN WriteOutput[]; END ELSE IF outputStrm # NIL THEN BEGIN registry_ NIL; lastAtomIndex _ dotIndex _ 0; WriteOutput[]; END; IF outputStrm # NIL THEN outputStrm.Reset[]; IF tokenType = comma THEN EXIT ELSE RETURN; END; ENDCASE => SyntaxError; END; openRoute => BEGIN IF inRoute THEN SyntaxError; inRoute _ TRUE; name_ registry_ NIL; dotIndex _ lastAtomIndex _ 0; lookingFor _ routeAddress; IF outputStrm # NIL THEN BEGIN len: INT; r: ROPE; IF (len_ (r_ RopeFromROS[outputStrm, FALSE]).Length[]) > 0 THEN IF r.Fetch[len-1] # SP THEN outputStrm.PutChar[SP]; outputStrm.PutChar['<]; END; END; closeRoute => BEGIN IF ~inRoute OR lookingFor # delim THEN SyntaxError; inRoute _ FALSE; IF needAtSign AND ~seenAtSign THEN SyntaxError; CheckForArpa[]; IF outputStrm # NIL THEN outputStrm.PutChar['>]; SELECT (tokenType _ GetToken[]) FROM comma, endOfLine, endOfList, endOfInput, semiColon => BEGIN IF inGroup AND ((tokenType # comma) AND (tokenType # semiColon)) THEN SyntaxError; <> IF outputStrm # NIL AND tokenType=semiColon THEN outputStrm.PutChar[';]; [reg, doWrite]_ process[name, registry, FALSE, TRUE]; IF doWrite AND outputStrm # NIL THEN WriteOutput[]; IF outputStrm # NIL THEN outputStrm.Reset[]; IF tokenType = semiColon THEN {inGroup_ FALSE; EXIT}; IF tokenType = comma THEN EXIT ELSE RETURN; END; ENDCASE => SyntaxError; END; domainLiteral => SELECT lookingFor FROM name, registry => BEGIN IF ~seenAtSign THEN SyntaxError; IF lookingFor = name THEN name_ name.Concat[token] ELSE registry_ registry.Concat[token]; lookingFor_ delim; IF outputStrm # NIL THEN { outputStrm.PutRope[token]; dotIndex_ 0; lastAtomIndex_ RopeFromROS[outputStrm, FALSE].Length[] }; END; delim, groupContents, routeAddress => SyntaxError; ENDCASE; ENDCASE => ERROR; EXITS SecondChance => ProcessPhrase[]; END; -- of EXITS block -- ENDLOOP; ENDLOOP; -- for each list element. }; -- of ParseList -- <> IF outputStrm # NIL THEN outputStrm.Reset[]; ParseList[ ! SyntaxError => {error _ badFieldBody; CONTINUE}]; IF error # none THEN ERROR ParseError[error]; }; -- of ParseNameList -- Get: PROC [ph: ParseHandle, next: PROC RETURNS[CHAR]] RETURNS [char: CHAR] = { <> <> IF ph.giveACR THEN {ph.giveACR _ FALSE; RETURN[CR]}; IF ph.char = NUL THEN BEGIN IF (char _ next[]) = CR THEN BEGIN ph.char _ next[]; SELECT ph.char FROM SP, TAB => {ph.char _ NUL; RETURN[SP]}; ENDCASE; END; END ELSE {char _ ph.char; ph.char _ NUL}; }; -- of Get -- END.