<> <> <> <> <> DIRECTORY Ascii USING [TAB, CR, SP, DEL], IO USING [Backup, EndOf, EndOfStream, GetChar, PeekChar, STREAM], Lex822, RefText USING [InlineAppendChar, ObtainScratch, ReleaseScratch], Rope USING [Cat, FromRefText, IsEmpty, ROPE]; Lex822Impl: CEDAR PROGRAM IMPORTS IO, RefText, Rope EXPORTS Lex822 = BEGIN OPEN Lex822; CharType: TYPE = {ctlType, crType, lwspType, specialType, atomType, illegalType}; charType: ARRAY CHAR OF CharType; InternalGetToken: PROC [s: IO.STREAM] RETURNS [token: Rope.ROPE, tokType: TokenType] = { <> state: {start, inWhite, inAtom, inDelimited, slurpOne} _ start; nestable: BOOL; nestingLevel: INT _ 0; openingDelim, closingDelim: CHAR; TokenProc: CharProc = { SELECT state FROM start => SELECT charType[char] FROM crType => IF NOT s.EndOf[] AND charType[s.PeekChar[]] = lwspType THEN { tokType _ whiteSpaceTok; state _ inWhite; } ELSE { tokType _ EOLTok; RETURN [TRUE, TRUE] }; lwspType => {tokType _ whiteSpaceTok; state _ inWhite}; specialType => SELECT char FROM '( => { state _ inDelimited; nestable _ TRUE; nestingLevel _ 1; openingDelim _ char; closingDelim _ '); tokType _ commentTok; }; '[ => { state _ inDelimited; nestable _ FALSE; openingDelim _ char; closingDelim _ ']; tokType _ domainLiteralTok; }; '" => { state _ inDelimited; nestable _ FALSE; openingDelim _ char; closingDelim _ '"; tokType _ quotedStringTok; }; ENDCASE => {tokType _ specialTok; RETURN [TRUE, TRUE]}; atomType => {tokType _ atomTok; state _ inAtom}; ENDCASE => {tokType _ errorTok; RETURN [TRUE, TRUE]}; inWhite => { IF char = Ascii.CR THEN { IF s.EndOf[] OR charType[s.PeekChar[]] # lwspType THEN RETURN [TRUE, FALSE]; } ELSE RETURN[charType[char] # lwspType, charType[char] = lwspType]; }; inAtom => RETURN[charType[char] # atomType, charType[char] = atomType]; inDelimited => { IF char = '\\ THEN state _ slurpOne ELSE IF char = Ascii.CR THEN { IF s.EndOf[] OR charType[s.PeekChar[]] # lwspType THEN { tokType _ errorTok; -- token ends too soon RETURN [TRUE, FALSE]; }; } ELSE IF char = closingDelim THEN { IF nestable THEN nestingLevel _ nestingLevel - 1; IF nestingLevel <= 0 THEN quit _ TRUE; } ELSE IF char = openingDelim THEN IF nestable THEN nestingLevel _ nestingLevel + 1 ELSE {tokType _ errorTok; RETURN [TRUE, FALSE]} -- tried to nest illegally }; slurpOne => state _ inDelimited; ENDCASE => ERROR; }; -- of TokenProc tokType _ errorTok; token _ GetSequence[s, TokenProc]; -- sets tokType as side effect }; LexToken: PUBLIC PROC [s: IO.STREAM] RETURNS [token, whiteSpace: Rope.ROPE, tokType: TokenType] = { <> whiteSpace _ NIL; DO [token, tokType] _ InternalGetToken[s]; SELECT tokType FROM whiteSpaceTok => whiteSpace _ whiteSpace.Cat[" "]; commentTok => whiteSpace _ whiteSpace.Cat[token]; ENDCASE => RETURN; ENDLOOP; }; LexFieldName: PUBLIC PROC [s: IO.STREAM] RETURNS [fieldName: Rope.ROPE, fieldNameOk: BOOL] = { FieldProc: CharProc = { IF char = ': THEN RETURN [TRUE, FALSE]; SELECT charType[char] FROM ctlType, lwspType, crType => RETURN [TRUE, FALSE]; ENDCASE => RETURN [FALSE, TRUE]; }; fieldName _ GetSequence[s, FieldProc]; IF fieldName.IsEmpty THEN fieldNameOk _ s.EndOf[] OR charType[s.PeekChar[]] = crType ELSE fieldNameOk _ TRUE; }; LexText: PUBLIC PROC [s: IO.STREAM] RETURNS [text: Rope.ROPE] = { <> lastWasCR: BOOL _ FALSE; TextProc: CharProc = { IF lastWasCR AND charType[char] # lwspType THEN { <> quit _ TRUE; include _ FALSE; } ELSE { <> quit _ FALSE; include _ TRUE; }; lastWasCR _ char = Ascii.CR; }; RETURN [GetSequence[s, TextProc]]; }; GetSequence: PUBLIC PROC [stream: IO.STREAM, charProc: CharProc] RETURNS [value: Rope.ROPE] = { buffer: REF TEXT _ RefText.ObtainScratch[512]; buffer.length _ 0; DO char: CHAR _ stream.GetChar[ ! IO.EndOfStream => EXIT]; quit, include: BOOLEAN; [quit, include] _ charProc[char]; IF include THEN buffer _ RefText.InlineAppendChar[buffer, char] ELSE IF quit THEN stream.Backup[char]; IF quit THEN EXIT; ENDLOOP; value _ Rope.FromRefText[buffer]; RefText.ReleaseScratch[buffer]; }; FOR c: CHAR IN [0C..37C] DO charType[c] _ ctlType; ENDLOOP; FOR c: CHAR IN [41C..176C] DO charType[c] _ atomType; ENDLOOP; charType[Ascii.CR] _ crType; charType[Ascii.TAB] _ lwspType; charType[Ascii.SP] _ lwspType; charType['"] _ specialType; charType['\\] _ specialType; charType['(] _ specialType; charType[')] _ specialType; charType['[] _ specialType; charType[']] _ specialType; charType['<] _ specialType; charType['>] _ specialType; charType['@] _ specialType; charType['.] _ specialType; charType[',] _ specialType; charType[':] _ specialType; charType[';] _ specialType; charType[Ascii.DEL] _ ctlType; FOR c: CHAR IN [200C..377C] DO charType[c] _ illegalType; ENDLOOP; END.