Lex822Impl.mesa, Implementation of RFC 822 lexical analyzer.
HGM, March 8, 1984 10:08:21 pm PST
David Nichols, July 13, 1983 3:17 pm
Last Edited by: Taft, February 5, 1984 11:53:10 am PST
John Larson, March 9, 1987 6:01:34 pm PST
DIRECTORY
Ascii USING [TAB, CR, SP, DEL],
IO USING [Backup, EndOf, EndOfStream, GetChar, PeekChar, STREAM],
Lex822,
RefText USING [InlineAppendChar, ObtainScratch, ReleaseScratch],
Rope USING [Cat, FromRefText, IsEmpty, ROPE];
Lex822Impl: CEDAR PROGRAM
IMPORTS IO, RefText, Rope
EXPORTS Lex822 =
BEGIN OPEN Lex822;
CharType: TYPE = {ctlType, crType, lwspType, specialType, atomType, illegalType};
charType: ARRAY CHAR OF CharType;
InternalGetToken: PROC [s: IO.STREAM] RETURNS [token: Rope.ROPE, tokType: TokenType] = {
Get a token of any type from s, including white space and comments.
state: {start, inWhite, inAtom, inDelimited, slurpOne} ← start;
nestable: BOOL;
nestingLevel: INT ← 0;
openingDelim, closingDelim: CHAR;
TokenProc: CharProc = {
SELECT state FROM
start => SELECT charType[char] FROM
crType =>
IF NOT s.EndOf[] AND charType[s.PeekChar[]] = lwspType THEN {
tokType ← whiteSpaceTok;
state ← inWhite;
}
ELSE {
tokType ← EOLTok;
RETURN [TRUE, TRUE]
};
lwspType => {tokType ← whiteSpaceTok; state ← inWhite};
specialType => SELECT char FROM
'( => {
state ← inDelimited;
nestable ← TRUE;
nestingLevel ← 1;
openingDelim ← char;
closingDelim ← ');
tokType ← commentTok;
};
'[ => {
state ← inDelimited;
nestable ← FALSE;
openingDelim ← char;
closingDelim ← '];
tokType ← domainLiteralTok;
};
'" => {
state ← inDelimited;
nestable ← FALSE;
openingDelim ← char;
closingDelim ← '";
tokType ← quotedStringTok;
};
ENDCASE => {tokType ← specialTok; RETURN [TRUE, TRUE]};
atomType => {tokType ← atomTok; state ← inAtom};
ENDCASE => {tokType ← errorTok; RETURN [TRUE, TRUE]};
inWhite => {
IF char = Ascii.CR THEN {
IF s.EndOf[] OR charType[s.PeekChar[]] # lwspType THEN
RETURN [TRUE, FALSE];
}
ELSE
RETURN[charType[char] # lwspType, charType[char] = lwspType];
};
inAtom => RETURN[charType[char] # atomType, charType[char] = atomType];
inDelimited => {
IF char = '\\ THEN
state ← slurpOne
ELSE IF char = Ascii.CR THEN {
IF s.EndOf[] OR charType[s.PeekChar[]] # lwspType THEN {
tokType ← errorTok; -- token ends too soon
RETURN [TRUE, FALSE];
};
}
ELSE IF char = closingDelim THEN {
IF nestable THEN nestingLevel ← nestingLevel - 1;
IF nestingLevel <= 0 THEN quit ← TRUE;
}
ELSE IF char = openingDelim THEN
IF nestable THEN nestingLevel ← nestingLevel + 1
ELSE {tokType ← errorTok; RETURN [TRUE, FALSE]} -- tried to nest illegally
};
slurpOne => state ← inDelimited;
ENDCASE => ERROR;
}; -- of TokenProc
tokType ← errorTok;
token ← GetSequence[s, TokenProc]; -- sets tokType as side effect
};
LexToken: PUBLIC PROC [s: IO.STREAM] RETURNS [token, whiteSpace: Rope.ROPE, tokType: TokenType] = {
Get a normal token from s and return the text of the token, the white space and comments following the token, and the type of token that was found. Since the white space is returned separately, tokType will never be whiteSpaceTok or commentTok.
whiteSpace ← NIL;
DO
[token, tokType] ← InternalGetToken[s];
SELECT tokType FROM
whiteSpaceTok => whiteSpace ← whiteSpace.Cat[" "];
commentTok => whiteSpace ← whiteSpace.Cat[token];
ENDCASE => RETURN;
ENDLOOP;
};
LexFieldName: PUBLIC PROC [s: IO.STREAM] RETURNS [fieldName: Rope.ROPE, fieldNameOk: BOOL] = {
FieldProc: CharProc = {
IF char = ': THEN RETURN [TRUE, FALSE];
SELECT charType[char] FROM
ctlType, lwspType, crType => RETURN [TRUE, FALSE];
ENDCASE => RETURN [FALSE, TRUE]; };
fieldName ← GetSequence[s, FieldProc];
IF fieldName.IsEmpty THEN fieldNameOk ← s.EndOf[] OR charType[s.PeekChar[]] = crType
ELSE fieldNameOk ← TRUE;
};
LexText: PUBLIC PROC [s: IO.STREAM] RETURNS [text: Rope.ROPE] = {
Return the contents of a field as text, i.e. return the text that follows up to a newline not followed by white space.
lastWasCR: BOOLFALSE;
TextProc: CharProc = {
IF lastWasCR AND charType[char] # lwspType THEN {
This is first char of next header.
quit ← TRUE;
include ← FALSE;
}
ELSE {
Use the char.
quit ← FALSE;
include ← TRUE;
};
lastWasCR ← char = Ascii.CR;
};
RETURN [GetSequence[s, TextProc]];
};
GetSequence: PUBLIC PROC [stream: IO.STREAM, charProc: CharProc] RETURNS [value: Rope.ROPE] = {
buffer: REF TEXT ← RefText.ObtainScratch[512];
buffer.length ← 0;
DO
char: CHAR ← stream.GetChar[ ! IO.EndOfStream => EXIT];
quit, include: BOOLEAN;
[quit, include] ← charProc[char];
IF include THEN buffer ← RefText.InlineAppendChar[buffer, char]
ELSE IF quit THEN stream.Backup[char];
IF quit THEN EXIT;
ENDLOOP;
value ← Rope.FromRefText[buffer];
RefText.ReleaseScratch[buffer];
};
FOR c: CHAR IN [0C..37C] DO
charType[c] ← ctlType;
ENDLOOP;
FOR c: CHAR IN [41C..176C] DO
charType[c] ← atomType;
ENDLOOP;
charType[Ascii.CR] ← crType;
charType[Ascii.TAB] ← lwspType;
charType[Ascii.SP] ← lwspType;
charType['"] ← specialType;
charType['\\] ← specialType;
charType['(] ← specialType;
charType[')] ← specialType;
charType['[] ← specialType;
charType[']] ← specialType;
charType['<] ← specialType;
charType['>] ← specialType;
charType['@] ← specialType;
charType['.] ← specialType;
charType[',] ← specialType;
charType[':] ← specialType;
charType[';] ← specialType;
charType[Ascii.DEL] ← ctlType;
FOR c: CHAR IN [200C..377C] DO
charType[c] ← illegalType;
ENDLOOP;
END.