-- File: MailParserA.mesa
-- Last edited by Levin: 29-Mar-82 9:16:14
-- Willie-Sue: June 28, 1982 11:06 am
DIRECTORY
Ascii USING [CR, LF, SP, TAB],
Inline USING [BITAND],
MailParse USING [
endOfInput, endOfList, maxRecipientLength, NameInfo, NameType, ParseErrorCode],
MailParsePrivate,
Storage USING [FreePages, FreeString, Node, String],
String USING [AppendChar, LowerCase, StringBoundsFault];
MailParserA: PROGRAM
IMPORTS Inline, MailParsePrivate, Storage, String
EXPORTS MailParse, MailParsePrivate =
BEGIN OPEN Ascii, MailParse, MailParsePrivate;
-- Miscellaneous --
ImpossibleString: ERROR = CODE;
-- Exported Types, Procedures, and Signals --
ParseHandle: PUBLIC TYPE = MailParsePrivate.ParseHandle;
ParseError: PUBLIC ERROR [code: ParseErrorCode] = CODE;
GetFieldName: PUBLIC PROCEDURE [pH: ParseHandle, fieldNameOut: STRING]
RETURNS [found: BOOLEAN] =
BEGIN
ch: CHARACTER;
truncated: BOOLEAN ← FALSE;
fieldNameOut.length ← 0;
DO
SELECT ch ← GetNaked[pH] FROM
CR, endOfInput =>
IF fieldNameOut.length = 0 THEN RETURN[FALSE] ELSE EXIT;
': =>
IF truncated AND pH.signalTruncation THEN ERROR ParseError[truncated]
ELSE RETURN[TRUE];
endOfList, IN [0C..10C], IN [12C..37C] => EXIT;
ENDCASE =>
IF fieldNameOut.length = fieldNameOut.maxlength THEN truncated ← TRUE
ELSE String.AppendChar[fieldNameOut, ch];
ENDLOOP;
ERROR ParseError[badFieldName];
END;
GetFieldBody: PUBLIC PROCEDURE [
pH: ParseHandle, fieldBodyOut: STRING, suppressWhiteSpace: BOOLEAN ← FALSE] =
BEGIN
ch: CHARACTER;
truncated: BOOLEAN ← FALSE;
spaceSeen: BOOLEAN ← TRUE; -- TRUE means ignore leading spaces
RemoveTrailingSpace: PROCEDURE = INLINE
BEGIN
WHILE fieldBodyOut.length > 0 AND fieldBodyOut[fieldBodyOut.length - 1] = SP DO
fieldBodyOut.length ← fieldBodyOut.length - 1;
ENDLOOP;
END;
fieldBodyOut.length ← 0;
IF fieldBodyOut.maxlength ~= 0 THEN
BEGIN
DO
IF (ch ← GetNaked[pH]) = CR THEN ch ← CheckForFolding[pH];
SELECT ch FROM
SP, TAB =>
BEGIN
IF spaceSeen THEN LOOP;
IF suppressWhiteSpace THEN {ch ← SP; spaceSeen ← TRUE};
END;
endOfInput => GO TO Trouble;
endOfList, CR => EXIT;
ENDCASE => spaceSeen ← FALSE;
IF fieldBodyOut.length = fieldBodyOut.maxlength THEN {truncated ← TRUE; EXIT};
fieldBodyOut[fieldBodyOut.length] ← ch;
fieldBodyOut.length ← fieldBodyOut.length + 1;
ENDLOOP;
RemoveTrailingSpace[];
IF ~truncated THEN RETURN;
END;
DO -- faster loop for discarding
IF (ch ← GetNaked[pH]) = CR THEN ch ← CheckForFolding[pH];
SELECT ch FROM
CR, endOfList => EXIT;
endOfInput => GO TO Trouble;
ENDCASE => truncated ← TRUE;
ENDLOOP;
IF truncated AND pH.signalTruncation THEN ERROR ParseError[truncated];
EXITS
Trouble => ERROR ParseError[badFieldBody];
END;
SyntaxError: PUBLIC ERROR = CODE;
ParseNameList: PUBLIC PROCEDURE [
pH: ParseHandle,
process: PROCEDURE [STRING, STRING, STRING, NameInfo] RETURNS [BOOLEAN],
write: PROCEDURE [CHARACTER] ← NIL, suppressWhiteSpace: BOOLEAN ← FALSE] =
BEGIN
error: BOOLEAN ← FALSE;
CleanUp: PROCEDURE =
BEGIN OPEN pH;
DiscardLexemeList[pH, tagLexList]; tagLexList ← NIL;
ResetCharacterStorage[pH];
ResetLexicalStorage[pH];
cantFinalize ← recordingChars ← FALSE;
END;
--main body of ParseNameList
pH.write ← write;
pH.process ← process;
pH.suppressWhiteSpace ← suppressWhiteSpace;
InitializeLexicalStorage[pH];
pH.cantFinalize ← pH.recordingChars ← TRUE;
pH.nameOutput ← pH.scanningTag ← FALSE;
pH.nameInfo ← [nesting: none, hasTag: FALSE, type: normal];
IF pH.simpleName = NIL THEN pH.simpleName ← Storage.String[maxRecipientLength];
IF pH.registry = NIL THEN pH.registry ← Storage.String[maxRecipientLength];
IF pH.arpaHost = NIL THEN pH.arpaHost ← Storage.String[maxRecipientLength];
ParseList[pH, endOfInput !
SyntaxError, EndOfInput => {error ← TRUE; CONTINUE};
UNWIND => CleanUp[]];
CleanUp[];
IF error THEN ERROR ParseError[badFieldBody];
END;
-- Support procedures for ParseNameList --
ParseList: PUBLIC PROCEDURE [pH: ParseHandle, terminator: CHARACTER] =
BEGIN OPEN pH;
MSGdl: ERROR = CODE;
outputThisLevel ← FALSE;
DO -- the somewhat obscure coding of this (inner) loop is for speed
lexType: LexemeType;
char: CHARACTER ← GetNaked[pH];
IF Inline.BITAND[char, 177B] <= 100C THEN
BEGIN
IF char = CR THEN char ← CheckForFolding[pH];
SELECT char FROM
SP, TAB => GO TO LexemePart;
nameSeparator, CR, endOfList, endOfInput =>
BEGIN
ProcessName[pH];
IF terminator = '; AND ~nameSeen THEN {backUp[]; ERROR MSGdl};
IF char ~= nameSeparator THEN
IF nameInfo.nesting = none THEN EXIT ELSE GO TO Trouble;
END;
'@ =>
IF EmptyLexemeListOrSpace[pH] THEN AccumulateFileName[pH]
ELSE GO TO LexemePart;
'< => RecurForBrackets[pH, '<, '>];
': =>
IF AccumulateTagOrPrelude[pH] = prelude THEN
BEGIN
nameSeen ← terminator ~= endOfInput;
RecurForBrackets[pH, ':, '; ! MSGdl =>
BEGIN
IF terminator ~= endOfInput THEN GO TO Trouble;
BuildMSGdlAtom[pH]; -- do this before the UNWIND!
CONTINUE
END];
END;
'>, '; =>
BEGIN
IF char ~= terminator THEN GO TO Trouble;
ProcessName[pH];
nameSeen ← TRUE;
IF outputThisLevel AND write ~= NIL THEN write[char];
EXIT
END;
openComment => AccumulateComment[pH];
stringQuote => AccumulateQuotedString[pH];
closeComment => GO TO Trouble;
ENDCASE => IF char < 040C THEN GO TO Trouble ELSE GO TO LexemePart;
LOOP;
EXITS
LexemePart =>
lexType ←
SELECT char FROM
'. => dot,
'@ => at,
SP, TAB => space,
ENDCASE => atom;
END
ELSE lexType ← atom;
MaybeNewLexemeInline[pH, lexType];
WriteToStoreInline[pH, char];
ENDLOOP;
EXITS Trouble => ERROR SyntaxError;
END;
ProcessName: PROCEDURE [pH: ParseHandle] =
BEGIN OPEN pH;
ParseName: PROCEDURE RETURNS [BOOLEAN] = -- INLINE --
BEGIN
state: {initial, possibleHost, hostSeen, possibleRegistry} ← initial;
InsertQualifier: PROCEDURE [lex: Lexeme, type: LexemeType, s: STRING]
RETURNS [qL: Lexeme] =
BEGIN
qL ← MakeLexemeFromString[pH, type, s];
AddLexeme[pH, qL, lex];
AddLexeme[pH, MakeLexeme[pH, atom], lex];
END;
GetQualifier: PROCEDURE [lex: Lexeme, s: STRING] =
BEGIN
lex ← lex.next;
IF lex.type = space THEN lex ← lex.next;
AppendLexemeValue[pH, s, lex];
END;
AtomIsReallyAt: PROCEDURE [lex: Lexeme] RETURNS [BOOLEAN] = INLINE
BEGIN
RETURN[
lex.length = 2 AND lex.next.type = space AND lex.prev.type = space AND
String.LowerCase[ReadFromStore[pH, lex.start]] = 'a AND
String.LowerCase[ReadFromStore[pH, lex.start + 1]] = 't]
END;
IF LexemeCount[pH] = none THEN RETURN[FALSE];
nameSeen ← nameSeen OR ~EmptyLexemeListOrSpace[pH];
simpleName.length ← registry.length ← arpaHost.length ← 0;
FOR lex: Lexeme ← lexHead.prev, lex.prev UNTIL lex.type = head DO
IF lex.type = space THEN LOOP;
SELECT state FROM
initial => IF lex.type = atom THEN state ← possibleHost;
possibleHost =>
BEGIN
SELECT lex.type FROM
at => NULL;
dot => {dotLex ← lex; EXIT};
atom =>
IF AtomIsReallyAt[lex] THEN
BEGIN
lex.start ← lex.prev.start;
lex.length ← lex.length + lex.next.length + lex.prev.length;
lex.type ← at;
DeleteLexeme[pH, lex.prev];
DeleteLexeme[pH, lex.next];
END
ELSE EXIT;
ENDCASE => EXIT;
state ← hostSeen;
atLex ← lex;
END;
hostSeen => IF lex.type = atom THEN state ← possibleRegistry ELSE EXIT;
possibleRegistry => {IF lex.type = dot THEN dotLex ← lex; EXIT};
ENDCASE;
ENDLOOP;
IF atLex = NIL THEN atLex ← InsertQualifier[lexHead, at, " at "L]
ELSE GetQualifier[atLex, arpaHost];
IF dotLex = NIL THEN dotLex ← InsertQualifier[atLex, dot, "."L]
ELSE GetQualifier[dotLex, registry];
nameInfo.type ← AppendSublistToString[pH, simpleName, lexHead, dotLex];
nameInfo.hasTag ← tagLexList ~= NIL;
RETURN[simpleName.length ~= 0 OR registry.length ~= 0 OR arpaHost.length ~= 0]
END;
IF scanningTag THEN ERROR SyntaxError;
dotLex ← atLex ← NIL;
TerminateLexeme[pH];
IF ParseName[ ! String.StringBoundsFault --[s] RETURNS [ns]-- =>
BEGIN
ns ← Storage.String[s.maxlength + s.maxlength/2];
SELECT s FROM
simpleName => simpleName ← ns;
registry => registry ← ns;
arpaHost => arpaHost ← ns;
ENDCASE => ERROR ImpossibleString;
Storage.FreeString[s];
RETRY
END]
AND process[simpleName, registry, arpaHost, nameInfo] THEN
BEGIN
IF write ~= NIL THEN OutputName[pH];
nameOutput ← outputThisLevel ← TRUE;
END;
IF tagLexList ~= NIL THEN
BEGIN
TruncateCharacterStorage[pH, tagLexList.start];
DiscardLexemeList[pH, tagLexList];
tagLexList ← NIL;
END
ELSE
IF LexemeCount[pH] > none THEN TruncateCharacterStorage[pH, lexHead.next.start];
ResetLexicalStorage[pH];
END;
AppendSublistToString: PUBLIC PROCEDURE [
pH: ParseHandle, s: STRING, first, last: Lexeme]
RETURNS [nameType: NameType] =
BEGIN
lex: Lexeme ← first;
nameType ← normal;
DO
SELECT lex.type FROM
space, head => NULL;
ENDCASE =>
BEGIN
IF lex.prev.type = space AND s.length ~= 0 THEN
{String.AppendChar[s, SP]; nameType ← multiAtom};
AppendLexemeValue[pH, s, lex];
IF nameType ~= multiAtom THEN
SELECT s[0] FROM
stringQuote => nameType ← quotedString;
'@ => nameType ← file;
ENDCASE =>
SELECT s[s.length-1] FROM
': => nameType ← msgDL;
'↑ => nameType ← publicDL;
ENDCASE;
END;
IF (lex ← lex.next) = last THEN EXIT;
ENDLOOP;
END;
-- Input Scanner --
EndOfInput: PUBLIC SIGNAL = CODE;
Get: PUBLIC PROCEDURE [pH: ParseHandle] RETURNS [char: CHARACTER] =
-- obtains next input character and smoothes over a few lexical quirks. This
-- procedure deals with Arpa-standard line-folding, except that bare CR characters
-- are not permitted.
BEGIN OPEN pH;
SELECT char ← nextChar[] FROM
endOfList, endOfInput => SIGNAL EndOfInput;
CR => char ← CheckForFoldingInline[pH];
ENDCASE;
END;
CheckForFolding: PROCEDURE [pH: ParseHandle] RETURNS [char: CHARACTER] =
{RETURN[CheckForFoldingInline[pH]]};
CheckForFoldingInline: PROCEDURE [pH: ParseHandle] RETURNS [char: CHARACTER] = INLINE
-- This procedure deals with Arpa-standard line-folding, except that bare CR
-- characters are not permitted.
BEGIN OPEN pH;
lfSeen: BOOLEAN;
char ← nextChar[];
IF (lfSeen ← (char = LF)) THEN char ← nextChar[];
SELECT char FROM
SP, TAB =>
IF recordingChars THEN
BEGIN
MaybeNewLexeme[pH, space];
WriteToStore[pH, CR];
IF lfSeen THEN WriteToStore[pH, LF];
END;
ENDCASE => {char ← CR; backUp[]};
END;
-- Lexeme Storage --
ResetLexicalStorage: PUBLIC PROCEDURE [pH: ParseHandle] =
-- flushes the lexeme table without destroying the backing storage behind it.
{FlushLexemeList[pH]; pH.lexStart ← GetPosition[pH]; pH.curLexType ← null};
LexemeCount: PUBLIC PROCEDURE [pH: ParseHandle] RETURNS [Magnitude] =
BEGIN
RETURN[
SELECT pH.lexHead.next FROM
pH.lexHead => none,
pH.lexHead.prev => one,
ENDCASE => many]
END;
EmptyLexemeListOrSpace: PUBLIC PROCEDURE [pH: ParseHandle] RETURNS [BOOLEAN] =
BEGIN
TerminateLexeme[pH];
SELECT LexemeCount[pH] FROM
none => RETURN[TRUE];
one => RETURN[pH.lexHead.next.type = space]
ENDCASE;
RETURN[FALSE]
END;
MakeLexeme: PUBLIC PROCEDURE [
pH: ParseHandle, type: LexemeType, start: CharIndex ← 0, length: CARDINAL ← 0]
RETURNS [lex: Lexeme] =
-- bundles up the specified range of characters as a lexeme of the indicated type,
-- and returns it.
BEGIN
IF (lex ← pH.freeLexHead) = NIL THEN lex ← Storage.Node[SIZE[Lex]]
ELSE pH.freeLexHead ← lex.next;
lex↑ ← Lex[next: NIL, prev: NIL, type: type, length: length, start: start];
END;
MakeLexemeFromString: PUBLIC PROCEDURE [pH: ParseHandle, type: LexemeType, s: STRING]
RETURNS [lex: Lexeme] =
-- bundles up the given string as a lexeme of the indicated type and returns it.
BEGIN
lex ← MakeLexeme[pH: pH, type: type, start: GetPosition[pH], length: s.length];
FOR i: CARDINAL IN [0..s.length) DO WriteToStore[pH, s[i]] ENDLOOP;
END;
AppendNewLexeme: PROCEDURE [pH: ParseHandle, type: LexemeType] = INLINE
-- bundles up the current range of characters as a lexeme of the indicated type
-- and appends it to the lexeme list.
BEGIN
pos: CharIndex = GetPosition[pH];
start: CharIndex = pH.lexStart;
IF pos = start -- OR type = null -- THEN RETURN;
AddLexeme[pH, MakeLexeme[pH: pH, type: type, start: start, length: pos - start]];
pH.lexStart ← pos;
END;
MaybeNewLexemeInline: PROCEDURE [pH: ParseHandle, new: LexemeType] = INLINE
-- a character belonging to a lexeme of type 'new' has been found. This may extend
-- a previous lexeme or begin a new one. MaybeNewLexeme decides which case
-- applies and acts appropriately.
BEGIN
IF new = pH.curLexType THEN RETURN;
AppendNewLexeme[pH, pH.curLexType];
pH.curLexType ← new;
END;
MaybeNewLexeme: PUBLIC PROCEDURE [pH: ParseHandle, new: LexemeType] =
{MaybeNewLexemeInline[pH, new]};
AppendLexemeValue: PUBLIC PROCEDURE [pH: ParseHandle, s: STRING, lex: Lexeme] =
-- appends the contents of the lexeme 'lex' to the string 's'.
BEGIN
FOR i: CARDINAL IN [lex.start..lex.start + lex.length) DO
String.AppendChar[s, ReadFromStore[pH, i]];
ENDLOOP;
END;
RemoveLexemeList: PUBLIC PROCEDURE [pH: ParseHandle] RETURNS [head: Lexeme] =
-- removes all lexemes except 'lexHead' from the list, returning the new 'head'.
BEGIN
IF (head ← pH.lexHead.next) = pH.lexHead THEN RETURN[NIL];
head.prev ← pH.lexHead.prev;
pH.lexHead.prev.next ← head;
pH.lexHead.next ← pH.lexHead.prev ← pH.lexHead;
END;
DiscardLexemeList: PUBLIC PROCEDURE [pH: ParseHandle, head: Lexeme] =
-- releases all lexemes (including 'head').
BEGIN
IF head = NIL THEN RETURN;
head.prev.next ← pH.freeLexHead;
pH.freeLexHead ← head;
END;
AddLexeme: PUBLIC PROCEDURE [pH: ParseHandle, lex: Lexeme, where: Lexeme ← NIL] =
-- adds 'lex' to the lexeme list immediately before 'where'. If 'where' is
-- defaulted, the lexeme goes at the end of the list.
BEGIN
IF where = NIL THEN where ← pH.lexHead;
lex.next ← where;
lex.prev ← where.prev;
lex.prev.next ← where.prev ← lex;
END;
DeleteLexeme: PUBLIC PROCEDURE [pH: ParseHandle, lex: Lexeme] =
-- removes 'lex' from the lexeme list.
BEGIN
IF lex.next ~= NIL THEN {lex.prev.next ← lex.next; lex.next.prev ← lex.prev};
lex.next ← pH.freeLexHead;
pH.freeLexHead ← lex;
END;
-- Character Storage
TruncateCharacterStorage: PUBLIC PROCEDURE [pH: ParseHandle, killFrom: CharIndex] =
-- releases backing storage for buffered names.
BEGIN OPEN pH;
IF ~(killFrom = 0 AND bufferList.header.count = 0) THEN
EnsureIndexInCache[pH, IF killFrom = 0 THEN 0 ELSE killFrom - 1];
DeleteSuccessorBuffers[cache];
cache.header.count ← killFrom - cache.header.first;
IF preludeOut > killFrom THEN preludeOut ← killFrom;
END;
ReadFromStore: PUBLIC PROCEDURE [pH: ParseHandle, index: CharIndex]
RETURNS [CHARACTER] =
-- returns the character at position 'index' in backing storage.
BEGIN
EnsureIndexInCache[pH, index];
RETURN[pH.cache.chars[index - pH.cache.header.first]]
END;
WriteToStore: PUBLIC PROCEDURE [pH: ParseHandle, char: CHARACTER] =
{WriteToStoreInline[pH, char]};
WriteToStoreInline: PROCEDURE [pH: ParseHandle, char: CHARACTER] = INLINE
-- appends 'char' to backing storage.
BEGIN
b: CharBuffer ← pH.cache;
UNTIL b.header.next = NIL DO b ← b.header.next ENDLOOP;
IF b.header.count = maxCharsPerBuffer THEN {
b.header.next ← AddBuffer[pH, b.header.first+maxCharsPerBuffer, b];
b ← b.header.next};
b.chars[b.header.count] ← char;
b.header.count ← b.header.count + 1;
END;
GetPosition: PUBLIC PROCEDURE [pH: ParseHandle] RETURNS [CharIndex] =
-- returns the character index of the next free character position in
-- backing storage.
BEGIN
b: CharBuffer ← pH.cache;
UNTIL b.header.next = NIL DO b ← b.header.next ENDLOOP;
RETURN[b.header.first + b.header.count]
END;
EnsureIndexInCache: PROCEDURE [pH: ParseHandle, i: CharIndex] = INLINE
BEGIN
IF i NOT IN [pH.cache.header.first..pH.cache.header.first+pH.cache.header.count) THEN
LoadCache[pH, i];
END;
DeleteSuccessorBuffers: PUBLIC PROCEDURE [b: CharBuffer] =
BEGIN
tb: CharBuffer ← b.header.next;
b.header.next ← NIL;
UNTIL (b ← tb) = NIL DO
tb ← b.header.next;
Storage.FreePages[b];
ENDLOOP;
END;
END.
Edit Log:
Willie-Sue on June 28, 1982 11:07 am: ~IN => NOT IN (Cedar compiler change)