ArpaLex822Impl.mesa, Implementation of RFC 822 lexical analyzer.
HGM, March 8, 1984 10:08:21 pm PST
David Nichols, July 13, 1983 3:17 pm
Last Edited by: Taft, February 5, 1984 11:53:10 am PST
HGM, March 8, 1984 10:08:21 pm PST
John Larson, October 10, 1987 5:01:34 pm PDT
DIRECTORY
Ascii USING [TAB, CR, SP, DEL],
IO USING [Backup, EndOf, EndOfStream, GetChar, PeekChar, STREAM],
ArpaLex822,
RefText USING [InlineAppendChar, ObtainScratch, ReleaseScratch],
Rope USING [Cat, FromRefText, IsEmpty, ROPE];
ArpaLex822Impl:
CEDAR
PROGRAM
IMPORTS IO, RefText, Rope
EXPORTS ArpaLex822 =
BEGIN OPEN ArpaLex822;
CharType: TYPE = {ctlType, crType, lwspType, specialType, atomType, illegalType};
charType: ARRAY CHAR OF CharType;
InternalGetToken:
PROC [s: IO.
STREAM]
RETURNS [token: Rope.
ROPE, tokType: TokenType] = {
Get a token of any type from s, including white space and comments.
state: {start, inWhite, inAtom, inDelimited, slurpOne} ← start;
nestable: BOOL;
nestingLevel: INT ← 0;
openingDelim, closingDelim: CHAR;
TokenProc: CharProc = {
SELECT state
FROM
start =>
SELECT charType[char]
FROM
crType =>
IF
NOT s.EndOf[]
AND charType[s.PeekChar[]] = lwspType
THEN {
tokType ← whiteSpaceTok;
state ← inWhite;
}
ELSE {
tokType ← EOLTok;
RETURN [TRUE, TRUE]
};
lwspType => {tokType ← whiteSpaceTok; state ← inWhite};
specialType =>
SELECT char
FROM
'( => {
state ← inDelimited;
nestable ← TRUE;
nestingLevel ← 1;
openingDelim ← char;
closingDelim ← ');
tokType ← commentTok;
};
'[ => {
state ← inDelimited;
nestable ← FALSE;
openingDelim ← char;
closingDelim ← '];
tokType ← domainLiteralTok;
};
'" => {
state ← inDelimited;
nestable ← FALSE;
openingDelim ← char;
closingDelim ← '";
tokType ← quotedStringTok;
};
ENDCASE => {tokType ← specialTok; RETURN [TRUE, TRUE]};
atomType => {tokType ← atomTok; state ← inAtom};
ENDCASE => {tokType ← errorTok; RETURN [TRUE, TRUE]};
inWhite =>
{
IF char = Ascii.
CR
THEN {
IF s.EndOf[]
OR charType[s.PeekChar[]] # lwspType
THEN
RETURN [TRUE, FALSE];
}
ELSE
RETURN[charType[char] # lwspType, charType[char] = lwspType];
};
inAtom => RETURN[charType[char] # atomType, charType[char] = atomType];
inDelimited => {
IF char = '\\
THEN
state ← slurpOne
ELSE IF char = Ascii.
CR
THEN {
IF s.EndOf[]
OR charType[s.PeekChar[]] # lwspType
THEN {
tokType ← errorTok; -- token ends too soon
RETURN [TRUE, FALSE];
};
}
ELSE
IF char = closingDelim
THEN {
IF nestable THEN nestingLevel ← nestingLevel - 1;
IF nestingLevel <= 0 THEN quit ← TRUE;
}
ELSE
IF char = openingDelim
THEN
IF nestable THEN nestingLevel ← nestingLevel + 1
ELSE {tokType ← errorTok; RETURN [TRUE, FALSE]} -- tried to nest illegally
};
slurpOne => state ← inDelimited;
ENDCASE => ERROR;
}; -- of TokenProc
tokType ← errorTok;
token ← GetSequence[s, TokenProc]; -- sets tokType as side effect
};
LexToken:
PUBLIC
PROC [s:
IO.
STREAM]
RETURNS [token, whiteSpace: Rope.
ROPE, tokType: TokenType] = {
Get a normal token from s and return the text of the token, the white space and comments following the token, and the type of token that was found. Since the white space is returned separately, tokType will never be whiteSpaceTok or commentTok.
whiteSpace ← NIL;
DO
[token, tokType] ← InternalGetToken[s];
SELECT tokType
FROM
whiteSpaceTok => whiteSpace ← whiteSpace.Cat[" "];
commentTok => whiteSpace ← whiteSpace.Cat[token];
ENDCASE => RETURN;
ENDLOOP;
};
LexFieldName:
PUBLIC
PROC [s:
IO.
STREAM]
RETURNS [fieldName: Rope.
ROPE, fieldNameOk:
BOOL] = {
FieldProc: CharProc = {
IF char = ': THEN RETURN [TRUE, FALSE];
SELECT charType[char]
FROM
ctlType, lwspType, crType => RETURN [TRUE, FALSE];
ENDCASE => RETURN [FALSE, TRUE]; };
fieldName ← GetSequence[s, FieldProc];
IF fieldName.IsEmpty THEN fieldNameOk ← s.EndOf[] OR charType[s.PeekChar[]] = crType
ELSE fieldNameOk ← TRUE;
};
LexText:
PUBLIC
PROC [s:
IO.
STREAM]
RETURNS [text: Rope.
ROPE] = {
Return the contents of a field as text, i.e. return the text that follows up to a newline not followed by white space.
lastWasCR: BOOL ← FALSE;
TextProc: CharProc = {
IF lastWasCR
AND charType[char] # lwspType
THEN {
This is first char of next header.
quit ← TRUE;
include ← FALSE;
}
ELSE {
Use the char.
quit ← FALSE;
include ← TRUE;
};
lastWasCR ← char = Ascii.CR;
};
RETURN [GetSequence[s, TextProc]];
};
GetSequence:
PUBLIC PROC [stream: IO.
STREAM, charProc: CharProc]
RETURNS [value: Rope.
ROPE] = {
buffer: REF TEXT ← RefText.ObtainScratch[512];
buffer.length ← 0;
DO
char: CHAR ← stream.GetChar[ ! IO.EndOfStream => EXIT];
quit, include: BOOLEAN;
[quit, include] ← charProc[char];
IF include THEN buffer ← RefText.InlineAppendChar[buffer, char]
ELSE IF quit THEN stream.Backup[char];
IF quit THEN EXIT;
ENDLOOP;
value ← Rope.FromRefText[buffer];
RefText.ReleaseScratch[buffer];
};
FOR c:
CHAR
IN [0C..37C]
DO
charType[c] ← ctlType;
ENDLOOP;
FOR c:
CHAR
IN [41C..176C]
DO
charType[c] ← atomType;
ENDLOOP;
charType[Ascii.CR] ← crType;
charType[Ascii.TAB] ← lwspType;
charType[Ascii.SP] ← lwspType;
charType['"] ← specialType;
charType['\\] ← specialType;
charType['(] ← specialType;
charType[')] ← specialType;
charType['[] ← specialType;
charType[']] ← specialType;
charType['<] ← specialType;
charType['>] ← specialType;
charType['@] ← specialType;
charType['.] ← specialType;
charType[',] ← specialType;
charType[':] ← specialType;
charType[';] ← specialType;
charType[Ascii.DEL] ← ctlType;
FOR c:
CHAR
IN [200C..377C]
DO
charType[c] ← illegalType;
ENDLOOP;
END.