[Cyan]<Cedar6.0>IO>IODateAndTimeImpl.mesa!2

IODateAndTimeImpl.mesa

Levin on January 16, 1984 12:44 pm PST

MBrown on September 20, 1983 10:39 pm

Russ Atkinson (RRA) February 2, 1985 1:17:13 pm PST

DIRECTORY

Ascii USING [SP, TAB, Upper],

BasicTime USING [DayOfWeek, hoursPerDay, minutesPerHour, MonthOfYear, secondsPerMinute, Unpacked],

IO USING [Backup, EndOf, Error, GetChar, STREAM],

RefText USING [AppendChar, ObtainScratch, ReleaseScratch, TrustTextAsRope],

Rope USING [Equal, ROPE, Substr];

IODateAndTimeImpl: CEDAR PROGRAM

IMPORTS Ascii, IO, RefText, Rope

EXPORTS IO

= BEGIN

hoursPerDay: NAT = BasicTime.hoursPerDay;

minutesPerHour: NAT = BasicTime.minutesPerHour;

secondsPerMinute: NAT = BasicTime.secondsPerMinute;

secondsPerDay: INT = hoursPerDay.LONG*minutesPerHour*secondsPerMinute;

baseYear: NAT = 1968;

EpochDays: TYPE = INT[0..LAST[LONG CARDINAL]/secondsPerDay];

The following computation isn't exactly correct, since it doesn't account for the absence of leap days in century years not divisible by 400. As it happens, the actual values for baseYear and LAST[EpochDays] are such that there is at most one such day, and it doesn't affect the arithmetic.

Year: TYPE = NAT[baseYear..MIN[baseYear+(LAST[EpochDays]/(4*365+1))*4, 2036--BasicTime's limit--]];

Month: TYPE = NAT[1..12];

MonthOfYear: TYPE = BasicTime.MonthOfYear[January..December];

Day: TYPE = NAT[1..31];

Hour: TYPE = NAT[0..hoursPerDay);

Minute: TYPE = NAT[0..minutesPerHour);

Second: TYPE = NAT[0..secondsPerMinute);

DeltaMinutes: TYPE = INTEGER[-hoursPerDay*minutesPerHour..hoursPerDay*minutesPerHour];

defined as GMT minus local mean time

ZoneIndex: TYPE = INT [-12..12];

NAZones: TYPE = ZoneIndex[4..11];

ROPE: TYPE = Rope.ROPE;

initialTokens: NAT = 20;

initialBackingChars: NAT = 100;

TokenCount: TYPE = NAT;

TokenIndex: TYPE = NAT[TokenCount.FIRST..TokenCount.LAST);

CharIndex: TYPE = NAT;

CharCount: TYPE = NAT[0..37777B]; -- this upper bound saves a word in Token

Input: TYPE = RECORD [

nTokens: TokenCount,

scratch: REF TEXT,

backing: REF TEXT,

tokens: SEQUENCE length: TokenCount OF Token

];

Token: TYPE = RECORD [

offset: CharIndex,

kind: SELECT type: * FROM

alpha => [length: CharCount],

num => [length: CharCount],

sep => [char: CHAR],

ENDCASE

];

sentinel: CHAR = 200C;

GetUnpackedTime: PUBLIC PROC [s: IO.STREAM] RETURNS [time: BasicTime.Unpacked ← []] = {

Unintelligible: ERROR [pos: NAT] = CODE;

input: REF Input;

***************

* Scanner

***************

The tokens built up in the input sequence (input.tokens) satisfy the following properties:

input.tokens[0] is a sentinel separator

two separators never appear consecutively

a sentinel separator delimits the end of interesting input

InitializeInput: PROC = {

input ← NEW[Input[initialTokens]];

input.scratch ← RefText.ObtainScratch[initialBackingChars];

input.backing ← RefText.AppendChar[input.scratch, sentinel];

input.nTokens ← 0;

AddToken[[0, sep[sentinel]]];

};

FinalizeInput: PROC = {RefText.ReleaseScratch[input.scratch]};

state: {initial, num, alpha, eof} ← $initial;

GetToken: PROC [ti: TokenIndex] RETURNS [Token] = {

UNTIL input.nTokens > ti DO

IF state = $eof THEN RETURN [input.tokens[input.nTokens-1]];

ReadMoreTokens[];

ENDLOOP;

RETURN[input.tokens[ti]]

};

ReadMoreTokens: PROC = {

i: NAT;

GetChar: PROC RETURNS [char: CHAR] = {

char ← IF s.EndOf[] THEN sentinel ELSE s.GetChar[];

i ← input.backing.length;

input.backing ← RefText.AppendChar[input.backing, char];

};

Backup: PROC = {

s.Backup[input.backing[i ← input.backing.length.PRED]];

input.backing.length ← i;

};

tStart: NAT;

IF state ~= $initial THEN ERROR;

char: CHAR ← GetChar[];

SELECT char FROM

IN ['0..'9] =>

SELECT state FROM

$initial => {state ← $num; tStart ← i};

$num => NULL;

$alpha => {AddToken[[tStart, alpha[i-tStart]]]; Backup[]; EXIT};

ENDCASE;

IN ['A..'Z], IN ['a..'z] =>

SELECT state FROM

$initial => {state ← $alpha; tStart ← i};

$num => {AddToken[[tStart, num[i-tStart]]]; Backup[]; EXIT};

$alpha => NULL;

ENDCASE;

ENDCASE => {

SELECT char FROM

',, '., ':, '/, '-, '+, Ascii.SP => NULL;

Ascii.TAB => char ← Ascii.SP;

ENDCASE => char ← sentinel;

SELECT state FROM

$initial =>

WITH t: input.tokens[input.nTokens-1] SELECT FROM

sep => {

The effect of the following is to ignore spaces as separators unless there is nothing else, and to terminate the scan if two separator characters are adjacent.

IF t.char = Ascii.SP THEN t.char ← char

ELSE IF char ~= Ascii.SP THEN --illegal-- t.char ← char ← sentinel;

IF char = sentinel THEN {state ← $eof; EXIT};

};

ENDCASE => {

AddToken[[i, sep[char]]];

IF char = sentinel THEN state ← $eof;

EXIT

};

$num => {AddToken[[tStart, num[i-tStart]]]; AddToken[[i, sep[char]]]; EXIT};

$alpha => {AddToken[[tStart, alpha[i-tStart]]]; AddToken[[i, sep[char]]]; EXIT};

ENDCASE;

};

ENDLOOP;

IF state ~= $eof THEN state ← $initial;

};

AddToken: PROC [t: Token] = {

The TRUSTED statements in this procedure are actually SAFE, but the compiler is too conservative in its treatment of variant records. Our Tokens contain no REFs, so we should be able to manipulate them freely without compromising the integrity of storage. Unfortunately, the compiler doesn't understand that.

IF input.nTokens = input.length THEN {

newInput: REF Input ← NEW[Input[(input.length*3)/2]];

newInput.nTokens ← input.nTokens;

newInput.backing ← input.backing;

FOR i: NAT IN [0..input.nTokens) DO

TRUSTED{newInput.tokens[i] ← input.tokens[i]};

ENDLOOP;

input ← newInput;

}

ELSE {TRUSTED{input.tokens[input.nTokens] ← t}; input.nTokens ← input.nTokens.SUCC};

};

BackupStream: PROC [to: CharIndex] = {

FOR i: CharIndex DECREASING IN [to..input.backing.length) DO

char: CHAR = input.backing[i];

IF char ~= sentinel THEN s.Backup[char];

ENDLOOP;

};

***************

* Weekday Parser

***************

ParseWeekday: PROC [first: TokenIndex] RETURNS [next: TokenIndex] = {

next ← first;

WITH GetToken[first] SELECT FROM

t: alpha Token => {

DayOfWeek: TYPE = BasicTime.DayOfWeek[Monday..Sunday];

weekdays: ARRAY DayOfWeek OF ROPE = [

"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"];

a: ROPE =

Rope.Substr[base: RefText.TrustTextAsRope[input.backing], start: t.offset, len: t.length];

IF t.length >= 2 THEN

FOR i: DayOfWeek IN DayOfWeek DO

IF a.Equal[s2: weekdays[i].Substr[len: t.length], case: FALSE] THEN

RETURN[ConsumeSPOrComma[first.SUCC]];

ENDLOOP;

};

ENDCASE;

};

***************

* Date Parser

***************

ParseDate: PROC [first: TokenIndex] RETURNS [next: TokenIndex] = {

ENABLE OutOfRange => ERROR Unintelligible[pos];

BogusMonth: ERROR = CODE;

ParseSymbolicMonth: PROC [t: alpha Token] RETURNS [month: MonthOfYear] = {

MonthNames: TYPE = ARRAY MonthOfYear OF ROPE;

m: ROPE =

Rope.Substr[base: RefText.TrustTextAsRope[input.backing], start: t.offset, len: t.length];

SELECT Ascii.Upper[input.backing[t.offset]] FROM

'I, 'V, 'X => {

roman: MonthNames ← [

"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X", "XI", "XII"];

FOR month IN MonthOfYear DO

IF m.Equal[s2: roman[month], case: FALSE] THEN RETURN;

ENDLOOP;

};

ENDCASE => {

english: MonthNames ← [

"January", "February", "March", "April", "May", "June",

"July", "August", "September", "October", "November", "December"];

IF t.length >= 3 THEN

FOR month IN MonthOfYear DO

IF m.Equal[s2: english[month].Substr[len: t.length], case: FALSE] THEN RETURN;

ENDLOOP;

};

ERROR BogusMonth

};

CollectYear: PROC [y: num Token] RETURNS [Year] = {

This assumes Year.FIRST is in the 20th century

RETURN[1900+CollectValue[y, Year.FIRST-1900, 99 ! OutOfRange => CONTINUE]];

RETURN[CollectValue[y, Year.FIRST, Year.LAST]]

};

CollectMonth: PROC [m: num Token] RETURNS [MonthOfYear] = INLINE

{RETURN[VAL[CollectValue[m, Month.FIRST, Month.LAST]-Month.FIRST]]};

daysForMonth: ARRAY MonthOfYear OF NAT = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];

CollectDay: PROC [d: num Token, m: MonthOfYear] RETURNS [Day] = INLINE

{RETURN[CollectValue[d, Day.FIRST, daysForMonth[m]]]};

second, third: TokenIndex;

t: num Token;

IF GetToken[first].type = sep THEN Bogus[first];

Assert: 'first' is alpha or num.

WITH GetToken[second𡤏irst.SUCC] SELECT FROM

sep: sep Token => IF sep.char = sentinel THEN Bogus[second] ELSE second ← second.SUCC;

ENDCASE;

Assert: 'second' is alpha or num. (Reasoning: if first.SUCC was a separator other than the final sentinel, then it must be followed by at least one non-separator token, since two separators cannot be adjacent, and the token sequence always ends with a sentinel, which is a separator.

WITH GetToken[third←second.SUCC] SELECT FROM

sep: sep Token => IF sep.char = sentinel THEN Bogus[third] ELSE third ← third.SUCC;

ENDCASE;

Assert: 'third' is alpha or num. (Reasoning is similar to that for 'second'.) In all legal cases, 'third' must be numeric, so we check for that now.

WITH GetToken[third] SELECT FROM

y: num Token => t ← y;

ENDCASE => Bogus[third];

WITH GetToken[first] SELECT FROM

m: alpha Token => {

time.year ← CollectYear[t];

time.month ← ParseSymbolicMonth[m ! BogusMonth => Bogus[first]];

'first' corresponds to a valid month. We now require that 'second' be the day of the month and 'third' be the year.

WITH GetToken[second] SELECT FROM

d: num Token => time.day ← CollectDay[d, time.month];

ENDCASE => Bogus[second];

};

f: num Token =>

WITH GetToken[second] SELECT FROM

m: alpha Token => {

The second token is alpha, so we will require it to be the month.

time.month ← ParseSymbolicMonth[m ! BogusMonth => Bogus[second]];

Now we must decide if we have <day><month><year> or <year><month><day>.

time.day ← CollectDay[f, time.month ! OutOfRange => GO TO yearFirst];

time.year ← CollectYear[t];

EXITS

yearFirst => {time.year ← CollectYear[f]; time.day ← CollectDay[t, time.month]};

};

s: num Token => {

All three parts are numeric.

time.year ← CollectYear[t ! OutOfRange => GO TO yearFirst];

time.month ← CollectMonth[f ! OutOfRange => GO TO dayFirst];

time.day ← CollectDay[s, time.month];

EXITS

dayFirst => {

time.month ← CollectMonth[s];

time.day ← CollectDay[f, time.month];

};

yearFirst => {

time.year ← CollectYear[f];

time.month ← CollectMonth[s];

time.day ← CollectDay[t, time.month];

};

ENDCASE;

next ← third.SUCC;

Be sure we weren't slipped a bogus leap year...

IF time.month = February AND time.day = 29 AND ~LeapYear[time.year] THEN Bogus[next];

};

***************

* Time Parser

***************

ParseTime: PROC [first: TokenIndex] RETURNS [next: TokenIndex] = {

ENABLE OutOfRange => ERROR Unintelligible[pos];

CollectHour: PROC [h: num Token] RETURNS [Hour] = INLINE

{RETURN[CollectValue[h, Hour.FIRST, Hour.LAST]]};

CollectMinute: PROC [m: num Token] RETURNS [Minute] = INLINE

{RETURN[CollectValue[m, Minute.FIRST, Minute.LAST]]};

CollectSecond: PROC [s: num Token] RETURNS [Second] = INLINE

{RETURN[CollectValue[s, Second.FIRST, Second.LAST]]};

AMorPM: PROC [t: TokenIndex] RETURNS [BOOL] = {

WITH GetToken[t] SELECT FROM

m: alpha Token =>

IF m.length = 2 AND Ascii.Upper[input.backing[m.offset.SUCC]] = 'M THEN {

offset: Hour;

SELECT Ascii.Upper[input.backing[m.offset]] FROM

'A => offset ← 0;

'P => offset ← 12;

ENDCASE => RETURN[FALSE];

IF ~(time.hour IN [1..12]) THEN Bogus[t];

time.hour ← (time.hour MOD 12) + offset;

RETURN[TRUE]

};

ENDCASE;

RETURN[FALSE]

};

n: num Token;

next ← first;

time.hour ← time.minute ← time.second ← 0;

WITH GetToken[first] SELECT FROM

f: num Token => n ← f;

f: sep Token => IF f.char = sentinel THEN --time omitted-- RETURN ELSE Bogus[first];

ENDCASE => Bogus[first];

next ← next.SUCC;

SELECT n.length FROM

1, 2, 4, 6 => NULL;

ENDCASE => Bogus[first];

time.hour ← CollectHour[[n.offset, num[MIN[n.length, 2]]]];

IF n.length <= 2 THEN {

A separator must follow a 1- or 2- digit hour field.

WITH GetToken[next] SELECT FROM

sep: sep Token => IF sep.char = sentinel THEN Bogus[next];

ENDCASE => Bogus[next];

Assert: 'next' is a non-sentinel separator. Therefore, 'next.SUCC' is alpha or num.

next ← next.SUCC;

WITH GetToken[next] SELECT FROM

s: num Token => n ← s;

ENDCASE => Bogus[next];

SELECT n.length FROM

2, 4 => next ← next.SUCC;

ENDCASE => Bogus[next];

}

ELSE {n.offset ← n.offset + 2; n.length ← n.length - 2};

time.minute ← CollectMinute[[n.offset, num[2]]];

Assert: 'next' indexes the token following the minutes.

IF n.length > 2 THEN

The minutes and seconds are concatenated.

time.second ← CollectSecond[[n.offset+2, num[2]]]

ELSE

Now look for optional seconds field. We assume it is present if there is a colon separator following the minutes.

WITH GetToken[next] SELECT FROM

sep: sep Token =>

IF sep.char = ': THEN

Assert: 'next.SUCC' is alpha or num.

WITH GetToken[next.SUCC] SELECT FROM

s: num Token => {time.second ← CollectSecond[s]; next ← next + 2};

ENDCASE => Bogus[next.SUCC];

ENDCASE;

Assert: 'next' indexes the token following the last time part (minutes or seconds).

WITH GetToken[next] SELECT FROM

s: sep Token => IF s.char ~= sentinel AND AMorPM[next.SUCC] THEN next ← next + 2;

a: alpha Token => IF AMorPM[next] THEN next ← next.SUCC;

ENDCASE;

};

***************

* Zone Parser

***************

ParseZone: PROC [first: TokenIndex] RETURNS [next: TokenIndex] = {

ENABLE OutOfRange => ERROR Unintelligible[pos];

BadZone: ERROR = CODE;

dst: BOOL ← FALSE;

CollectSymbolic: PROC [z: alpha Token] RETURNS [DeltaMinutes] = {

char: CHAR = Ascii.Upper[input.backing[z.offset]];

SELECT z.length FROM

1 => {

zones: PACKED ARRAY ZoneIndex OF CHAR = [

'Y, 'X, 'W, 'V, 'U, 'T, 'S, 'R, 'Q, 'P, 'O, 'N,

'Z,

'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'K, 'L, 'M

];

FOR hour: ZoneIndex IN ZoneIndex DO

IF char = zones[hour] THEN RETURN[hour*minutesPerHour];

ENDLOOP;

};

2 => IF char = 'U AND Ascii.Upper[input.backing[z.offset+1]] = 'T THEN RETURN[0];

3 => {

naZones: PACKED ARRAY NAZones OF CHAR = ['A, 'E, 'C, 'M, 'P, 'Y, 'H, 'B];

IF Ascii.Upper[input.backing[z.offset+2]] = 'T THEN {

SELECT Ascii.Upper[input.backing[z.offset.SUCC]] FROM

'S, 'M => NULL; -- treat "mean" same as "standard"

'D => dst ← TRUE;

ENDCASE;

SELECT char FROM

'G => IF ~dst THEN RETURN[0];

'N => IF ~dst THEN RETURN[3*minutesPerHour+30];

ENDCASE =>

FOR hour: NAZones IN NAZones DO

IF char = naZones[hour] THEN RETURN[hour*minutesPerHour];

ENDLOOP;

};

ENDCASE;

ERROR BadZone

};

CollectAbsolute: PROC [z: num Token] RETURNS [DeltaMinutes] = {

hour: Hour;

hourLength: NAT;

minute: Minute ← 0;

IF ~(z.length IN [1..4]) THEN ERROR OutOfRange[z.offset+z.length];

hourLength ← 2 - (z.length MOD 2);

hour ← CollectValue[[z.offset, num[hourLength]], Hour.FIRST, Hour.LAST];

IF z.length > 2 THEN

minute ← CollectValue[[z.offset+hourLength, num[2]], Minute.FIRST, Minute.LAST];

RETURN[hour*minutesPerHour+minute]

};

WITH GetToken[next ← first] SELECT FROM

z: sep Token =>

SELECT z.char FROM

Ascii.SP, '+, '- => {

WITH GetToken[next.SUCC] SELECT FROM

zone: num Token =>

SELECT z.char FROM

Ascii.SP => GO TO noZone; -- numeric must be preceded by + or -

'+ => time.zone ← CollectAbsolute[zone];

'- => time.zone ← -CollectAbsolute[zone];

ENDCASE; -- can't happen

zone: alpha Token =>

IF z.char = '+ THEN Bogus[next.SUCC]

ELSE

time.zone ← CollectSymbolic[zone ! BadZone =>

IF z.char = Ascii.SP THEN GO TO noZone ELSE Bogus[next.SUCC]];

ENDCASE; -- can't happen

next ← next.SUCC;

};

ENDCASE => GO TO noZone; -- includes sentinel

z: alpha Token => time.zone ← CollectSymbolic[z ! BadZone => GO TO noZone];

ENDCASE => GO TO noZone;

next ← next.SUCC;

time.dst ← IF dst THEN yes ELSE no;

EXITS

noZone => NULL;

};

***************

* Utilities

***************

Bogus: PROC [ti: TokenIndex] = {ERROR Unintelligible[input.tokens[ti].offset]};

OutOfRange: ERROR [pos: CharIndex] = CODE;

CollectValue: PROC [t: num Token, low, high: NAT] RETURNS [value: NAT ← 0] = {

FOR j: CharIndex IN [t.offset..t.offset+t.length) DO

We would like to catch a bounds fault here and turn it into OutOfRange. Unfortunately, the current implementation of catch phrases doesn't support this, since a catch phrase is only attached to procedure calls. Accordingly, we have to do the test by hand. The actual test is a bit too stringent, but it doesn't really matter.

IF value > NAT.LAST/10 - 1 THEN ERROR OutOfRange[j];

value ← value * 10 + (input.backing[j] - '0);

ENDLOOP;

IF ~(value IN [low..high]) THEN ERROR OutOfRange[t.offset+t.length];

};

LeapYear: PROC [year: Year] RETURNS [BOOL] = {

RETURN[year MOD 4 = 0 AND (year MOD 100 ~= 0 OR year MOD 400 = 0)]};

ConsumeSPOrComma: PROC [t: TokenIndex] RETURNS [TokenIndex] = {

WITH GetToken[t] SELECT FROM

s: sep Token => SELECT s.char FROM Ascii.SP, ', => RETURN[t.SUCC]; ENDCASE;

ENDCASE;

RETURN[t]

};

***************

* Main Body

***************

nextToken: TokenIndex ← TokenIndex.FIRST.SUCC;

InitializeInput[];

BEGIN

ENABLE Unintelligible => {

BackupStream[to: pos]; FinalizeInput[]; ERROR IO.Error[SyntaxError, s]};

BEGIN

nextToken ← ParseWeekday[nextToken];

nextToken ← ParseDate[nextToken ! Unintelligible => GO TO tryTimeFirst];

nextToken ← ParseTime[ConsumeSPOrComma[nextToken] ! Unintelligible => GO TO dateOnly];

nextToken ← ParseZone[nextToken];

EXITS

dateOnly => NULL;

tryTimeFirst => {

nextToken ← ParseTime[nextToken];

nextToken ← ParseZone[nextToken];

nextToken ← ParseDate[ConsumeSPOrComma[nextToken] ! Unintelligible => CONTINUE];

};

END;

BackupStream[to: input.tokens[nextToken].offset];

FinalizeInput[];

};

END.