ArpaMTMain.mesa
Last Edited by: Nichols, July 19, 1983 1:54 pm
Last Edited by: Taft, February 5, 1984 1:13:05 pm PST
Hal Murray May 27, 1985 8:06:02 pm PDT
John Larson, April 20, 1988 11:51:37 am PDT
DIRECTORY
ArpaLex822 USING [CharProc, GetSequence, LexFieldName, LexText, LexToken],
ArpaMT USING[Direction],
ArpaMTMiscOps USING [Lookup, Table],
ArpaMTP1 USING [GetEvalResult, InstallParseTable, Parse, ScannerProc, ScanDate, ScanName],
ArpaMTParseData,
ArpaMTTreeOps: TYPE USING [DefaultDomain, discretionaryBlank, DomainType, TreeToRope, TreeToSimpleRope, ToArpa, ToGrapevine, XeroxDomain],
Ascii USING [CR, LF, SP, TAB],
IO USING [EndOf, EndOfStream, GetBlock, GetChar, GetIndex, PeekChar, Put, PutBlock, PutChar, PutF, PutFR, PutRope, RIS, rope, SetIndex, STREAM],
List USING [Nconc1],
RefText USING [ObtainScratch, ReleaseScratch],
Rope USING [Cat, Equal, Fetch, Find, IsEmpty, Length, ROPE],
PrincOpsUtils USING [Codebase];
ArpaMTMain: CEDAR MONITOR -- Parser has lots of global data
IMPORTS
ArpaLex822, ArpaMTMiscOps, ArpaMTP1, ArpaMTParseData, ArpaMTTreeOps, IO, List, PrincOpsUtils, RefText, Rope
EXPORTS ArpaMT =
BEGIN OPEN P1: ArpaMTP1;
ROPE: TYPE = Rope.ROPE;
This table holds the names of headers in a message that should be parsed as name fields.
nameFields: ArpaMTMiscOps.Table ~ LIST["from", "sender", "to", "cc", "bcc", "reply-to", "resent-from", "resent-sender", "resent-to", "resent-cc", "resent-bcc", "resent-reply-to", "remailed-from", "remailed-sender", "remailed-to", "remailed-cc", "remailed-bcc"];
Other interesting header filed names.
msgID: ROPE = "Message-ID";
lineFold: ROPE = "Line-Fold";
The variable that holds the parsed header and its type.
HeaderRec: TYPE = RECORD[
name: Rope.ROPE,
fieldType: {textField, dateField, nameField},
body: REF ANY]; -- Rope for text and whatever ArpaMTP1.GetEvalResult returns for others
Info: TYPE = REF InfoRec;
InfoRec: PUBLIC TYPE = RECORD [
id: ROPENIL,
foldLines: BOOL, -- TRUE if we should fold lines for this message.
defaultDomainType: ArpaMTTreeOps.DomainType, -- defaults for translate routines
defaultDomain: REF ANY,
headers: LIST OF REF ANY ]; -- really LIST OF REF HeaderRec, but I need the List ops
Line folding stuff.
foldLength: INT ← 80; -- How long to make folded lines.
alreadyFoldedLength: INT ← 90; -- Max a line can be and still appear to be folded.
foldedLookahead: INT ← 5000; -- Look at this many chars to decide if folded.
maxAllowedBlackLength: INT ← 50; -- Probably data. Don't fold. (Line fold code crashes on some data files)
InitParser: PROC = TRUSTED {
Initializes the parser for parsing name fields.
P1.InstallParseTable[LOOPHOLE[PrincOpsUtils.Codebase[LOOPHOLE[ArpaMTParseData]]]]; };
IsNameFieldHeader: PROC [fieldName: Rope.ROPE] RETURNS [BOOL] = {
Returns TRUE if fieldName is in nameFields, the list of headers that contain names.
RETURN [ArpaMTMiscOps.Lookup[fieldName, nameFields]];
};
Fold: PROC [in: IO.STREAM] RETURNS [fold: BOOL] = {
Look at the first part of in and decide whether to fold.
pos: INT ← in.GetIndex[];
curLength, curBlackLength, maxLength, maxBlackLength: INT ← 0;
count: INT ← 0;
WHILE count < foldedLookahead AND ~in.EndOf[] DO
char: CHAR ← in.GetChar[];
IF char = Ascii.CR THEN {
IF curLength > maxLength THEN maxLength ← curLength;
IF curBlackLength > maxBlackLength THEN maxBlackLength ← curBlackLength;
curLength ← 0;
curBlackLength ← 0}
ELSE {
curLength ← curLength+1;
IF char = Ascii.SP OR char = Ascii.TAB THEN {
IF curBlackLength > maxBlackLength THEN maxBlackLength ← curBlackLength;
curBlackLength ← 0; }
ELSE curBlackLength ← curBlackLength+1;
};
count ← count+1;
ENDLOOP;
IF curLength > maxLength THEN maxLength ← curLength;
IF curBlackLength > maxBlackLength THEN maxBlackLength ← curBlackLength;
in.SetIndex[pos];
RETURN [maxLength > alreadyFoldedLength AND maxBlackLength < maxAllowedBlackLength]; };
CopyFolded: PROC [in, out: IO.STREAM, breakRope: Rope.ROPENIL, lineLength: INT ← foldLength, initialLength: INT ← 0] = {
Fold the the text from in appending the breakRope to the newline that is used to break. lineLength controls how long the lines are, and initialLength tells how much text is on the current line when CopyFolded is first called.
currentLength: INT ← initialLength;
breakLength: INT ← breakRope.Length[];
whiteSpace, blackSpace: Rope.ROPE;
WhiteSpaceProc: ArpaLex822.CharProc = {
SELECT char FROM
Ascii.SP, Ascii.TAB, Ascii.LF => RETURN [FALSE, TRUE];
ArpaMTTreeOps.discretionaryBlank => RETURN [FALSE, FALSE]
ENDCASE => RETURN [TRUE, FALSE]; };
BlackSpaceProc: ArpaLex822.CharProc = {
SELECT char FROM
Ascii.SP, Ascii.TAB, Ascii.LF, Ascii.CR, ArpaMTTreeOps.discretionaryBlank =>
RETURN [TRUE, FALSE];
ENDCASE => RETURN [FALSE, TRUE]; };
DO
ENABLE IO.EndOfStream => EXIT;
IF in.PeekChar[] = Ascii.CR THEN {
out.PutChar[in.GetChar[]];
[] ← in.PeekChar[]; -- Careful! Don't output the breakRope if we're out of input.
IF breakLength > 0 THEN
out.PutRope[breakRope];
currentLength ← breakLength;
};
whiteSpace ← ArpaLex822.GetSequence[in, WhiteSpaceProc];
blackSpace ← ArpaLex822.GetSequence[in, BlackSpaceProc];
IF currentLength+whiteSpace.Length[]+blackSpace.Length[] > lineLength THEN {
IF blackSpace.Length[] > 0 THEN {
out.PutChar['\n];
IF breakLength > 0 THEN
out.PutRope[breakRope];
out.PutRope[blackSpace];
currentLength ← breakLength+blackSpace.Length[];
};
else we must be at EOF or a newline and we might as well just discard the whiteSpace.
}
ELSE {
out.PutRope[whiteSpace];
out.PutRope[blackSpace];
currentLength ← currentLength+whiteSpace.Length[]+blackSpace.Length[];
};
ENDLOOP;
};
CopyNormal: PROC [in, out: IO.STREAM] ~ {
Copy in to out.
buffer: REF TEXT = RefText.ObtainScratch[512];
DO
IF in.GetBlock[buffer]=0 THEN EXIT;
out.PutBlock[buffer];
ENDLOOP;
RefText.ReleaseScratch[buffer]; };
SetDefaultDomain: PROC [info: Info] = {
hl: LIST OF REF ANY ← info.headers;
fromDomainType, senderDomainType: ArpaMTTreeOps.DomainType ← unknown;
fromDomain, senderDomain: REF ANYNIL;
WHILE hl # NIL DO
hr: REF HeaderRec ← NARROW[hl.first];
IF hr.fieldType = nameField THEN {
IF Rope.Equal[hr.name, "sender", FALSE] THEN
[senderDomainType, senderDomain] ← ArpaMTTreeOps.DefaultDomain[hr.body]
ELSE IF Rope.Equal[hr.name, "from", FALSE] THEN
[fromDomainType, fromDomain] ← ArpaMTTreeOps.DefaultDomain[hr.body] };
hl ← hl.rest;
ENDLOOP;
SELECT TRUE FROM
senderDomainType # unknown => {
info.defaultDomainType ← senderDomainType;
info.defaultDomain ← senderDomain; };
fromDomainType # unknown => {
info.defaultDomainType ← fromDomainType;
info.defaultDomain ← fromDomain;}
ENDCASE => {
info.defaultDomainType ← unknown;
info.defaultDomain ← NIL; }; };
ParseHeaders: PUBLIC ENTRY PROC [file, errStream: IO.STREAM] RETURNS [info: Info] = {
ENABLE UNWIND => NULL;
headerName, token, whiteSpace, token2: Rope.ROPE;
headerNameOk: BOOL;
Error: PROC [errorMsg: Rope.ROPE] = {
errStream.PutF["Error in parse of %g: %g.\n", IO.rope[headerName], IO.rope[errorMsg]]; };
ParseField: PROC [hr: REF HeaderRec, Input: P1.ScannerProc, what: Rope.ROPE] RETURNS[error: BOOLFALSE]~ {
streamIndex: INT ← file.GetIndex[];
badSemantics: BOOLFALSE;
nTokens, nErrors: INT;
[nErrors: nErrors, nTokens: nTokens] ← P1.Parse[file, Input];
IF nErrors = 0 THEN
[hr.body, badSemantics] ← P1.GetEvalResult[];
IF nErrors > 0 OR badSemantics THEN {
Error[Rope.Cat["incorrect ", what]];
error ← TRUE;
hr.fieldType ← textField;
file.SetIndex[streamIndex];
hr.body ← ArpaLex822.LexText[file]; }; };
info ← NEW[InfoRec];
info.headers ← NIL;
info.foldLines ← TRUE;
DO
hr: REF HeaderRec;
[headerName, headerNameOk] ← ArpaLex822.LexFieldName[file];
IF headerNameOk AND headerName.IsEmpty[] THEN EXIT;
IF headerNameOk THEN [token, whiteSpace, ] ← ArpaLex822.LexToken[file]
ELSE { token ← whiteSpace ← NIL; Error["Illegal field name"]; };
IF ~headerNameOk OR ~token.Equal[":"] THEN {
IF headerNameOk THEN Error["Field name not followed by colon"];
token2 ← ArpaLex822.LexText[file];
hr ← NEW[HeaderRec ← [
name: NIL, -- Empty name field means don't use colon to print it.
fieldType: textField,
body: Rope.Cat[headerName, whiteSpace, token, token2]]]; }
ELSE {
Everything's wonderful, handle this header.
hr ← NEW[HeaderRec ← [name~headerName, fieldType~TRASH]];
SELECT TRUE FROM
headerName.Equal["date", FALSE] => {
hr.fieldType ← dateField;
[] ← ParseField[hr, P1.ScanDate, "date field"] };
headerName.Equal["cc", FALSE] => {
ccRope: Rope.ROPE;
error: BOOLFALSE;
hr.fieldType ← nameField;
error ← ParseField[hr, P1.ScanName, "name list"];
IF hr = NIL THEN LOOP;
IF hr.body = NIL THEN LOOP;
IF ~error THEN {
ccRope ← ArpaMTTreeOps.TreeToSimpleRope[hr.body];
IF ccRope = NIL THEN LOOP;
IF Rope.Equal[ccRope, "\n"] THEN LOOP; -- cc fields are not allowed to be empty
};
};
IsNameFieldHeader[headerName] => {
hr.fieldType ← nameField;
[] ← ParseField[hr, P1.ScanName, "name list"] };
ENDCASE => {
r: Rope.ROPE ← ArpaLex822.LexText[file];
hr.fieldType ← textField;
hr.body ← r;
Is this test too slow?
IF hr.name.Equal[lineFold, FALSE] AND r.Find[s2: "no", case: FALSE]>=0 THEN
info.foldLines ← FALSE; }; };
IF hr # NIL THEN info.headers ← List.Nconc1[info.headers, hr];
ENDLOOP;
SetDefaultDomain[info]; };
PrintHeaders: PUBLIC PROC [info: Info, out: IO.STREAM, fold: BOOLFALSE] = {
Print the list of headers that ParseHeaders produced. This can also be called after calling TranslateToArpa or TranslateToGrapevine.
hl: LIST OF REF ANY ← info.headers;
WHILE hl # NIL DO
hr: REF HeaderRec ← NARROW[hl.first];
body: Rope.ROPE;
spareStream: IO.STREAM;
IF ~hr.name.IsEmpty[] THEN
out.Put[IO.rope[hr.name], IO.rope[":"]];
IF hr.fieldType = nameField OR hr.fieldType = dateField THEN
body ← ArpaMTTreeOps.TreeToRope[tree: hr.body, insertDiscretionaryBlanks: fold]
ELSE body ← NARROW[hr.body];
IF fold THEN {
spareStream ← IO.RIS[body, spareStream];
CopyFolded[in: spareStream, out: out, breakRope: " ", initialLength: hr.name.Length+1]; }
ELSE out.PutRope[body];
hl ← hl.rest;
ENDLOOP; };
xeroxDomain: REF ANY = ArpaMTTreeOps.XeroxDomain[];
TranslateToArpa: PUBLIC PROC [info: Info] = {
hl: LIST OF REF ANY ← info.headers;
msgId: BOOLFALSE;
FOR hl: LIST OF REF ANY ← info.headers, hl.rest UNTIL hl = NIL DO
hr: REF HeaderRec ← NARROW[hl.first];
IF Rope.Equal[hr.name, msgID, FALSE] THEN msgId ← TRUE;
IF hr.fieldType = nameField THEN
IF info.defaultDomainType = registry THEN
hr.body ← ArpaMTTreeOps.ToArpa[hr.body, info.defaultDomainType, info.defaultDomain]
ELSE
Msg from ARPA land gets redistributed to ARPA via a GV DL.
Info extracted from From/Sender field is bogus.
hr.body ← ArpaMTTreeOps.ToArpa[hr.body, arpaDomain, xeroxDomain];
ENDLOOP;
IF ~msgId AND info.id # NIL THEN {
hr: REF HeaderRec ← NEW[HeaderRec];
hr^ ← [
name: msgID,
fieldType: textField,
body: IO.PutFR[" <%G@%G>\n", [rope[info.id]], [rope["Xerox"]]] ];
info.headers ← List.Nconc1[info.headers, hr]; }; };
TranslateToGrapevine: PUBLIC PROC [info: Info] = {
hl: LIST OF REF ANY ← info.headers;
WHILE hl # NIL DO
hr: REF HeaderRec ← NARROW[hl.first];
IF hr.fieldType = nameField THEN
hr.body ← ArpaMTTreeOps.ToGrapevine[hr.body, info.defaultDomainType, info.defaultDomain];
hl ← hl.rest;
ENDLOOP; };
TranslateMessage: PUBLIC PROC [in, out, error: IO.STREAM, direction: ArpaMT.Direction, id: ROPE] = {
Translate a message present on in to out printing error messages on error.
info: Info ← ParseHeaders[in, error];
fold: BOOL;
info.id ← id;
IF direction = toArpa THEN TranslateToArpa[info] ELSE TranslateToGrapevine[info];
PrintHeaders[info, out, direction = toArpa];
fold ← direction = toArpa AND info.foldLines AND Fold[in];
IF fold THEN CopyFolded[in, out] ELSE CopyNormal[in, out]; };
CheckFromField: PUBLIC PROC [info: Info] RETURNS [ok: BOOLTRUE] = {
fromFound: BOOLFALSE;
FOR hl: LIST OF REF ANY ← info.headers, hl.rest UNTIL hl = NIL DO
hr: REF HeaderRec ← NARROW[hl.first];
IF hr.fieldType = nameField AND (Rope.Equal[hr.name, "From", FALSE] OR Rope.Equal[hr.name, "Sender", FALSE]) THEN {
rope: Rope.ROPE ← ArpaMTTreeOps.TreeToSimpleRope[hr.body];
IF FunnyCharacters[rope] THEN ok ← FALSE;
IF Rope.Equal[hr.name, "From", FALSE] THEN fromFound ← TRUE; }
ENDLOOP;
IF ~fromFound THEN ok ← FALSE; };
FunnyCharacters: PROC [rope: Rope.ROPE] RETURNS [funny: BOOL] =
BEGIN
See MaybeAddQuotes in SMTPSyntaxImpl
length: INT ← Rope.Length[rope];
IF length = 0 THEN RETURN[TRUE];
FOR i: INT IN [0..length) DO
SELECT Rope.Fetch[rope, i] FROM
> 177C => EXIT; -- Funny characters. What should happen to these??
'@, ', => NULL; -- @, and comma
'(, '), '<, '>, '< => NULL;
';, '\\, '", '[, '] => NULL; -- Rest of Specials EXCEPT PERIOD!
': => EXIT; -- Stop NS names from going through
' => NULL; -- Space
'\n => IF i # length-1 THEN EXIT; -- Trailing CR is OK
< 040C => EXIT; -- CTL
ENDCASE => NULL; -- Includes underbar
REPEAT FINISHED => RETURN[FALSE]; -- Nothing fancy inside the string
ENDLOOP;
RETURN[TRUE];
END;
InitParser[];
END.