<> <> <> <> <> <> DIRECTORY ArpaLex822 USING [CharProc, GetSequence, LexFieldName, LexText, LexToken], ArpaMT USING[Direction], ArpaMTMiscOps USING [Lookup, Table], ArpaMTP1 USING [GetEvalResult, InstallParseTable, Parse, ScannerProc, ScanDate, ScanName], ArpaMTParseData, ArpaMTTreeOps: TYPE USING [DefaultDomain, discretionaryBlank, DomainType, TreeToRope, TreeToSimpleRope, ToArpa, ToGrapevine, XeroxDomain], Ascii USING [CR, LF, SP, TAB], IO USING [EndOf, EndOfStream, GetBlock, GetChar, GetIndex, PeekChar, Put, PutBlock, PutChar, PutF, PutFR, PutRope, RIS, rope, SetIndex, STREAM], List USING [Nconc1], RefText USING [ObtainScratch, ReleaseScratch], Rope USING [Cat, Equal, Fetch, Find, IsEmpty, Length, ROPE], PrincOpsUtils USING [Codebase]; ArpaMTMain: CEDAR MONITOR -- Parser has lots of global data IMPORTS ArpaLex822, ArpaMTMiscOps, ArpaMTP1, ArpaMTParseData, ArpaMTTreeOps, IO, List, PrincOpsUtils, RefText, Rope EXPORTS ArpaMT = BEGIN OPEN P1: ArpaMTP1; ROPE: TYPE = Rope.ROPE; <> nameFields: ArpaMTMiscOps.Table ~ LIST["from", "sender", "to", "cc", "bcc", "reply-to", "resent-from", "resent-sender", "resent-to", "resent-cc", "resent-bcc", "resent-reply-to", "remailed-from", "remailed-sender", "remailed-to", "remailed-cc", "remailed-bcc"]; <> msgID: ROPE = "Message-ID"; lineFold: ROPE = "Line-Fold"; <> HeaderRec: TYPE = RECORD[ name: Rope.ROPE, fieldType: {textField, dateField, nameField}, body: REF ANY]; -- Rope for text and whatever ArpaMTP1.GetEvalResult returns for others Info: TYPE = REF InfoRec; InfoRec: PUBLIC TYPE = RECORD [ id: ROPE _ NIL, foldLines: BOOL, -- TRUE if we should fold lines for this message. defaultDomainType: ArpaMTTreeOps.DomainType, -- defaults for translate routines defaultDomain: REF ANY, headers: LIST OF REF ANY ]; -- really LIST OF REF HeaderRec, but I need the List ops <> <<>> foldLength: INT _ 80; -- How long to make folded lines. alreadyFoldedLength: INT _ 90; -- Max a line can be and still appear to be folded. foldedLookahead: INT _ 5000; -- Look at this many chars to decide if folded. maxAllowedBlackLength: INT _ 200; -- Probably data. Don't fold. (Line fold code crashes on some data files) InitParser: PROC = TRUSTED { <> P1.InstallParseTable[LOOPHOLE[PrincOpsUtils.Codebase[LOOPHOLE[ArpaMTParseData]]]]; }; IsNameFieldHeader: PROC [fieldName: Rope.ROPE] RETURNS [BOOL] = { <> RETURN [ArpaMTMiscOps.Lookup[fieldName, nameFields]]; }; Fold: PROC [in: IO.STREAM] RETURNS [fold: BOOL] = { <> pos: INT _ in.GetIndex[]; curLength, curBlackLength, maxLength, maxBlackLength: INT _ 0; count: INT _ 0; WHILE count < foldedLookahead AND ~in.EndOf[] DO char: CHAR _ in.GetChar[]; IF char = Ascii.CR THEN { IF curLength > maxLength THEN maxLength _ curLength; IF curBlackLength > maxBlackLength THEN maxBlackLength _ curBlackLength; curLength _ 0; curBlackLength _ 0} ELSE { curLength _ curLength+1; IF char = Ascii.SP OR char = Ascii.TAB THEN { IF curBlackLength > maxBlackLength THEN maxBlackLength _ curBlackLength; curBlackLength _ 0; } ELSE curBlackLength _ curBlackLength+1; }; count _ count+1; ENDLOOP; IF curLength > maxLength THEN maxLength _ curLength; IF curBlackLength > maxBlackLength THEN maxBlackLength _ curBlackLength; in.SetIndex[pos]; RETURN [maxLength > alreadyFoldedLength AND maxBlackLength < maxAllowedBlackLength]; }; CopyFolded: PROC [in, out: IO.STREAM, breakRope: Rope.ROPE _ NIL, lineLength: INT _ foldLength, initialLength: INT _ 0] = { <> currentLength: INT _ initialLength; breakLength: INT _ breakRope.Length[]; whiteSpace, blackSpace: Rope.ROPE; WhiteSpaceProc: ArpaLex822.CharProc = { SELECT char FROM Ascii.SP, Ascii.TAB, Ascii.LF => RETURN [FALSE, TRUE]; ArpaMTTreeOps.discretionaryBlank => RETURN [FALSE, FALSE] ENDCASE => RETURN [TRUE, FALSE]; }; BlackSpaceProc: ArpaLex822.CharProc = { SELECT char FROM Ascii.SP, Ascii.TAB, Ascii.LF, Ascii.CR, ArpaMTTreeOps.discretionaryBlank => RETURN [TRUE, FALSE]; ENDCASE => RETURN [FALSE, TRUE]; }; DO ENABLE IO.EndOfStream => EXIT; IF in.PeekChar[] = Ascii.CR THEN { out.PutChar[in.GetChar[]]; [] _ in.PeekChar[]; -- Careful! Don't output the breakRope if we're out of input. IF breakLength > 0 THEN out.PutRope[breakRope]; currentLength _ breakLength; }; whiteSpace _ ArpaLex822.GetSequence[in, WhiteSpaceProc]; blackSpace _ ArpaLex822.GetSequence[in, BlackSpaceProc]; IF currentLength+whiteSpace.Length[]+blackSpace.Length[] > lineLength THEN { IF blackSpace.Length[] > 0 THEN { out.PutChar['\n]; IF breakLength > 0 THEN out.PutRope[breakRope]; out.PutRope[blackSpace]; currentLength _ breakLength+blackSpace.Length[]; }; <> } ELSE { out.PutRope[whiteSpace]; out.PutRope[blackSpace]; currentLength _ currentLength+whiteSpace.Length[]+blackSpace.Length[]; }; ENDLOOP; }; CopyNormal: PROC [in, out: IO.STREAM] ~ { <> buffer: REF TEXT = RefText.ObtainScratch[512]; DO IF in.GetBlock[buffer]=0 THEN EXIT; out.PutBlock[buffer]; ENDLOOP; RefText.ReleaseScratch[buffer]; }; SetDefaultDomain: PROC [info: Info] = { hl: LIST OF REF ANY _ info.headers; fromDomainType, senderDomainType: ArpaMTTreeOps.DomainType _ unknown; fromDomain, senderDomain: REF ANY _ NIL; WHILE hl # NIL DO hr: REF HeaderRec _ NARROW[hl.first]; IF hr.fieldType = nameField THEN { IF Rope.Equal[hr.name, "sender", FALSE] THEN [senderDomainType, senderDomain] _ ArpaMTTreeOps.DefaultDomain[hr.body] ELSE IF Rope.Equal[hr.name, "from", FALSE] THEN [fromDomainType, fromDomain] _ ArpaMTTreeOps.DefaultDomain[hr.body] }; hl _ hl.rest; ENDLOOP; SELECT TRUE FROM senderDomainType # unknown => { info.defaultDomainType _ senderDomainType; info.defaultDomain _ senderDomain; }; fromDomainType # unknown => { info.defaultDomainType _ fromDomainType; info.defaultDomain _ fromDomain;} ENDCASE => { info.defaultDomainType _ unknown; info.defaultDomain _ NIL; }; }; ParseHeaders: PUBLIC ENTRY PROC [file, errStream: IO.STREAM] RETURNS [info: Info] = { ENABLE UNWIND => NULL; headerName, token, whiteSpace, token2: Rope.ROPE; headerNameOk: BOOL; Error: PROC [errorMsg: Rope.ROPE] = { errStream.PutF["Error in parse of %g: %g.\n", IO.rope[headerName], IO.rope[errorMsg]]; }; ParseField: PROC [hr: REF HeaderRec, Input: P1.ScannerProc, what: Rope.ROPE] RETURNS[error: BOOL _ FALSE]~ { streamIndex: INT _ file.GetIndex[]; badSemantics: BOOL _ FALSE; nTokens, nErrors: INT; [nErrors: nErrors, nTokens: nTokens] _ P1.Parse[file, Input]; IF nErrors = 0 THEN [hr.body, badSemantics] _ P1.GetEvalResult[]; IF nErrors > 0 OR badSemantics THEN { Error[Rope.Cat["incorrect ", what]]; error _ TRUE; hr.fieldType _ textField; file.SetIndex[streamIndex]; hr.body _ ArpaLex822.LexText[file]; }; }; info _ NEW[InfoRec]; info.headers _ NIL; info.foldLines _ TRUE; DO hr: REF HeaderRec; [headerName, headerNameOk] _ ArpaLex822.LexFieldName[file]; IF headerNameOk AND headerName.IsEmpty[] THEN EXIT; IF headerNameOk THEN [token, whiteSpace, ] _ ArpaLex822.LexToken[file] ELSE { token _ whiteSpace _ NIL; Error["Illegal field name"]; }; IF ~headerNameOk OR ~token.Equal[":"] THEN { IF headerNameOk THEN Error["Field name not followed by colon"]; token2 _ ArpaLex822.LexText[file]; hr _ NEW[HeaderRec _ [ name: NIL, -- Empty name field means don't use colon to print it. fieldType: textField, body: Rope.Cat[headerName, whiteSpace, token, token2]]]; } ELSE { <> hr _ NEW[HeaderRec _ [name~headerName, fieldType~TRASH]]; SELECT TRUE FROM headerName.Equal["date", FALSE] => { hr.fieldType _ dateField; [] _ ParseField[hr, P1.ScanDate, "date field"] }; headerName.Equal["cc", FALSE] => { ccRope: Rope.ROPE; error: BOOL _ FALSE; hr.fieldType _ nameField; error _ ParseField[hr, P1.ScanName, "name list"]; IF hr = NIL THEN LOOP; IF hr.body = NIL THEN LOOP; IF ~error THEN { ccRope _ ArpaMTTreeOps.TreeToSimpleRope[hr.body]; IF ccRope = NIL THEN LOOP; IF Rope.Equal[ccRope, "\n"] THEN LOOP; -- cc fields are not allowed to be empty }; }; IsNameFieldHeader[headerName] => { hr.fieldType _ nameField; [] _ ParseField[hr, P1.ScanName, "name list"] }; ENDCASE => { r: Rope.ROPE _ ArpaLex822.LexText[file]; hr.fieldType _ textField; hr.body _ r; <> IF hr.name.Equal[lineFold, FALSE] AND r.Find[s2: "no", case: FALSE]>=0 THEN info.foldLines _ FALSE; }; }; IF hr # NIL THEN info.headers _ List.Nconc1[info.headers, hr]; ENDLOOP; SetDefaultDomain[info]; }; PrintHeaders: PUBLIC PROC [info: Info, out: IO.STREAM, fold: BOOL _ FALSE] = { <> hl: LIST OF REF ANY _ info.headers; WHILE hl # NIL DO hr: REF HeaderRec _ NARROW[hl.first]; body: Rope.ROPE; spareStream: IO.STREAM; IF ~hr.name.IsEmpty[] THEN out.Put[IO.rope[hr.name], IO.rope[":"]]; IF hr.fieldType = nameField OR hr.fieldType = dateField THEN body _ ArpaMTTreeOps.TreeToRope[tree: hr.body, insertDiscretionaryBlanks: fold] ELSE body _ NARROW[hr.body]; IF fold THEN { spareStream _ IO.RIS[body, spareStream]; CopyFolded[in: spareStream, out: out, breakRope: " ", initialLength: hr.name.Length+1]; } ELSE out.PutRope[body]; hl _ hl.rest; ENDLOOP; }; xeroxDomain: REF ANY = ArpaMTTreeOps.XeroxDomain[]; TranslateToArpa: PUBLIC PROC [info: Info] = { hl: LIST OF REF ANY _ info.headers; msgId: BOOL _ FALSE; FOR hl: LIST OF REF ANY _ info.headers, hl.rest UNTIL hl = NIL DO hr: REF HeaderRec _ NARROW[hl.first]; IF Rope.Equal[hr.name, msgID, FALSE] THEN msgId _ TRUE; IF hr.fieldType = nameField THEN IF info.defaultDomainType = registry THEN hr.body _ ArpaMTTreeOps.ToArpa[hr.body, info.defaultDomainType, info.defaultDomain] ELSE <> <> hr.body _ ArpaMTTreeOps.ToArpa[hr.body, arpaDomain, xeroxDomain]; ENDLOOP; IF ~msgId AND info.id # NIL THEN { hr: REF HeaderRec _ NEW[HeaderRec]; hr^ _ [ name: msgID, fieldType: textField, body: IO.PutFR[" <%G@%G>\n", [rope[info.id]], [rope["Xerox"]]] ]; info.headers _ List.Nconc1[info.headers, hr]; }; }; TranslateToGrapevine: PUBLIC PROC [info: Info] = { hl: LIST OF REF ANY _ info.headers; WHILE hl # NIL DO hr: REF HeaderRec _ NARROW[hl.first]; IF hr.fieldType = nameField THEN hr.body _ ArpaMTTreeOps.ToGrapevine[hr.body, info.defaultDomainType, info.defaultDomain]; hl _ hl.rest; ENDLOOP; }; TranslateMessage: PUBLIC PROC [in, out, error: IO.STREAM, direction: ArpaMT.Direction, id: ROPE] = { <> info: Info _ ParseHeaders[in, error]; fold: BOOL; info.id _ id; IF direction = toArpa THEN TranslateToArpa[info] ELSE TranslateToGrapevine[info]; PrintHeaders[info, out, direction = toArpa]; fold _ direction = toArpa AND info.foldLines AND Fold[in]; IF fold THEN CopyFolded[in, out] ELSE CopyNormal[in, out]; }; CheckFromField: PUBLIC PROC [info: Info] RETURNS [ok: BOOL _ TRUE] = { fromFound: BOOL _ FALSE; FOR hl: LIST OF REF ANY _ info.headers, hl.rest UNTIL hl = NIL DO hr: REF HeaderRec _ NARROW[hl.first]; IF hr.fieldType = nameField AND (Rope.Equal[hr.name, "From", FALSE] OR Rope.Equal[hr.name, "Sender", FALSE]) THEN { rope: Rope.ROPE _ ArpaMTTreeOps.TreeToSimpleRope[hr.body]; IF FunnyCharacters[rope] THEN ok _ FALSE; IF Rope.Equal[hr.name, "From", FALSE] THEN fromFound _ TRUE; } ENDLOOP; IF ~fromFound THEN ok _ FALSE; }; FunnyCharacters: PROC [rope: Rope.ROPE] RETURNS [funny: BOOL] = BEGIN <> length: INT _ Rope.Length[rope]; IF length = 0 THEN RETURN[TRUE]; FOR i: INT IN [0..length) DO SELECT Rope.Fetch[rope, i] FROM > 177C => EXIT; -- Funny characters. What should happen to these?? '@, ', => NULL; -- @, and comma '(, '), '<, '>, '< => NULL; ';, '\\, '", '[, '] => NULL; -- Rest of Specials EXCEPT PERIOD! ': => EXIT; -- Stop NS names from going through ' => NULL; -- Space <<>> '\n => IF i # length-1 THEN EXIT; -- Trailing CR is OK < 040C => EXIT; -- CTL ENDCASE => NULL; -- Includes underbar REPEAT FINISHED => RETURN[FALSE]; -- Nothing fancy inside the string ENDLOOP; RETURN[TRUE]; END; InitParser[]; END.