[Indigo]<Cedar>DateAndTime>DateAndTimeUnsafeImpl.mesa!2

DateAndTimeUnsafeImpl.mesa

last edited by Levin on November 1, 1982 12:01 pm

DIRECTORY

Ascii USING [SP, TAB],

DateAndTimeUnsafe USING [Notes],

Heap USING [systemZone],

LongString USING [EquivalentSubStrings, SubStringDescriptor, UpperCase],

System USING [

GetLocalTimeParameters, gmtEpoch, GreenwichMeanTime, LocalTimeParameters,

LocalTimeParametersUnknown];

DateAndTimeUnsafeImpl: PROGRAM

IMPORTS Heap, String: LongString, System

EXPORTS DateAndTimeUnsafe =

BEGIN

Unintelligible: PUBLIC ERROR [vicinity: NAT] = CODE;

hoursPerDay: INT = 24;

minutesPerHour: INT = 60;

secondsPerMinute: INT = 60;

secondsPerDay: INT = hoursPerDay*minutesPerHour*secondsPerMinute;

baseYear: NAT = 1968;

baseDay: NAT[0..7) = 1; -- January 1, 1968 was a Monday (0 = Sunday)

EpochDays: TYPE = INT[0..LAST[LONG CARDINAL]/secondsPerDay];

YearDays: TYPE = INT[0..366];

The following computation isn't exactly correct, since it doesn't account for the absence of leap days in century years not divisible by 400. As it happens, the actual values for baseYear and LAST[EpochDays] are such that there is only one such day, and it doesn't affect the arithmetic.

Year: TYPE = [baseYear..baseYear+(LAST[EpochDays]/(4*365+1))*4];

Month: TYPE = [1..12];

Day: TYPE = [1..31];

Hour: TYPE = [0..hoursPerDay);

Minute: TYPE = [0..minutesPerHour);

Second: TYPE = [0..secondsPerMinute);

DeltaMinutes: TYPE = INT --(-hoursPerDay*minutesPerHour..hoursPerDay*minutesPerHour)--;

ZoneIndex: TYPE = INT [-12..12];

NAZones: TYPE = ZoneIndex [-11..-4];

Parse: PUBLIC PROCEDURE [s: LONG STRING]

RETURNS [dt: System.GreenwichMeanTime, notes: DateAndTimeUnsafe.Notes, length: NAT] = {

maxReasonableTokens: NAT = 30;

TokenCount: TYPE = NAT[0..maxReasonableTokens];

TokenIndex: TYPE = NAT[FIRST[TokenCount]..LAST[TokenCount]);

CharIndex: TYPE = NAT;

CharCount: TYPE = NAT[0..37777B]; -- this upper bound saves a word in Token

Tokens: TYPE = RECORD [

nTokens: TokenCount ← 0,

tokens: SEQUENCE length: TokenCount OF Token ← NULL];

Token: TYPE = RECORD [

offset: CharIndex,

kind: SELECT type: * FROM

alpha => [length: CharCount],

num => [length: CharCount],

sep => [char: CHARACTER],

ENDCASE];

input: LONG POINTER TO Tokens ← Heap.systemZone.NEW[Tokens[maxReasonableTokens] ← []];

sentinel: CHARACTER = 200C;

Bogus: PROCEDURE [ti: TokenIndex] = {ERROR Unintelligible[input.tokens[ti].offset]};

AddToken: PROCEDURE [t: Token] = {

If there are more than maxReasonableTokens, each subsequent call to this procedure will overwrite the last token stored. Eventually, the scan will complete and the final token will be a sentinel.

IF input.nTokens = input.length THEN input.tokens[input.nTokens-1] ← t

ELSE {input.tokens[input.nTokens] ← t; input.nTokens ← input.nTokens + 1};

};

Tokenize: PROCEDURE = {

state: {initial, num, alpha} ← initial;

i: NAT ← 0;

tStart: NAT;

AddToken[[0, sep[sentinel]]];

char: CHARACTER;

SELECT TRUE FROM

s = NIL, i = s.length => char ← sentinel;

s[i] = Ascii.TAB => char ← Ascii.SP;

s[i] IN [40C..176C] => char ← s[i];

ENDCASE => --illegal; terminate scan-- char ← sentinel;

SELECT char FROM

IN ['0..'9] =>

SELECT state FROM

initial => {state ← num; tStart ← i};

num => NULL;

alpha => {AddToken[[tStart, alpha[i-tStart]]]; state ← num; tStart ← i};

ENDCASE;

',, '., ':, '/, '-, '+, Ascii.SP, sentinel => {

SELECT state FROM

initial => NULL;

num => {AddToken[[tStart, num[i-tStart]]]; state ← initial};

alpha => {AddToken[[tStart, alpha[i-tStart]]]; state ← initial};

ENDCASE;

The effect of the following is to ignore spaces as separators unless there is nothing else, and to terminate the scan if two separator characters are adjacent.

WITH t: input.tokens[input.nTokens-1] SELECT FROM

sep =>

IF t.char = Ascii.SP THEN t.char ← char

ELSE IF char ~= Ascii.SP THEN --illegal-- t.char ← char ← sentinel;

ENDCASE => AddToken[[i, sep[char]]];

IF char = sentinel THEN EXIT};

ENDCASE =>

SELECT state FROM

initial => {state ← alpha; tStart ← i};

num => {AddToken[[tStart, num[i-tStart]]]; state ← alpha; tStart ← i};

alpha => NULL;

ENDCASE;

i ← i + 1;

ENDLOOP;

};

year: Year;

month: Month;

day: Day;

hour: Hour;

minute: Minute;

second: Second;

zoneAdjust: DeltaMinutes;

dst: BOOLEAN;

cumDays: ARRAY [FIRST[Month]-1..LAST[Month]] OF NAT = [

0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366];

OutOfRange: ERROR [errorPos: NAT] = CODE;

CollectValue: PROCEDURE [t: num Token, low, high: NAT] RETURNS [value: NAT ← 0] = {

FOR j: CharIndex IN [t.offset..t.offset+t.length) DO

We would like to catch a bounds fault here and turn it into OutOfRange. Unfortunately, the current implementation of catch phrases doesn't support this, since a catch phrase is only attached to procedure calls. Accordingly, we have to do the test by hand. The actual test is a bit too stringent, but it doesn't really matter.

IF value > LAST[NAT]/10 - 1 THEN ERROR OutOfRange[j];

value ← value * 10 + (s[j] - '0);

ENDLOOP;

IF ~(value IN [low..high]) THEN ERROR OutOfRange[t.offset+t.length];

};

ParseDate: PROCEDURE [first: TokenIndex] RETURNS [next: TokenIndex] = {

BogusMonth: ERROR = CODE;

ParseSymbolicMonth: PROCEDURE [t: alpha Token] RETURNS [month: Month] = {

MonthNames: TYPE = ARRAY Month OF STRING;

ss: String.SubStringDescriptor ← [base: s, offset: t.offset, length: t.length];

SELECT String.UpperCase[s[t.offset]] FROM

'I, 'V, 'X => {

roman: MonthNames ← [

"I"L, "II"L, "III"L, "IV"L, "V"L, "VI"L, "VII"L, "VIII"L, "IX"L, "X"L, "XI"L, "XII"L];

FOR month IN Month DO

mm: String.SubStringDescriptor ←

[base: roman[month], offset: 0, length: roman[month].length];

IF String.EquivalentSubStrings[@mm, @ss] THEN EXIT;

REPEAT

FINISHED => ERROR BogusMonth;

ENDLOOP

};

ENDCASE => {

english: MonthNames ← [

"January"L, "February"L, "March"L, "April"L, "May"L, "June"L,

"July"L, "August"L, "September"L, "October"L, "November"L, "December"L];

IF t.length < 3 THEN ERROR BogusMonth;

FOR month IN Month DO

mm: String.SubStringDescriptor ←

[base: english[month], offset: 0, length: t.length];

IF String.EquivalentSubStrings[@mm, @ss] THEN EXIT;

REPEAT

FINISHED => ERROR BogusMonth;

ENDLOOP

};

CollectYear: PROCEDURE [y: num Token] RETURNS [Year] = {

RETURN[1900+CollectValue[y, FIRST[Year]-1900, 99 ! OutOfRange => CONTINUE]];

RETURN[CollectValue[y, FIRST[Year], LAST[Year]]]

};

CollectMonth: PROCEDURE [m: num Token] RETURNS [Month] = INLINE

{RETURN[CollectValue[m, FIRST[Month], LAST[Month]]]};

CollectDay: PROCEDURE [d: num Token, m: Month] RETURNS [Day] = INLINE

{RETURN[CollectValue[d, FIRST[Day], cumDays[m]-cumDays[m-1]]]};

ENABLE OutOfRange => ERROR Unintelligible[errorPos];

second, third: TokenIndex;

t: num Token;

GetThird: PROCEDURE = {

WITH token: input.tokens[third] SELECT FROM

num => t ← token;

ENDCASE => Bogus[third]

};

IF input.nTokens < first + 3 OR input.tokens[first].type = sep THEN Bogus[first];

Assert: 'first' is alpha or num and at least two tokens follow it.

IF input.tokens[second𡤏irst+1].type = sep THEN second ← second + 1;

Assert: 'second' is alpha or num.

Ergo, second+1 < input.nTokens since input.tokens[input.nTokens-1].type = sep

IF input.tokens[third←second+1].type = sep THEN {

IF third = input.nTokens-1 THEN Bogus[third];

third ← third + 1};

Assert: 'third' is alpha or num. If we are really processing a date, 'third' will have to be numeric to make sense. However, we might be processing something like Wednesday, 26 May 1981, in which there is a valid date, but 'first' isn't pointing to it yet (it's still on Wednesday). So, we don't check that 'third' is numeric until we are sure that 'first' is pointing to something reasonable.

WITH f: input.tokens[first] SELECT FROM

alpha => {

The first token is alpha. If it isn't a recognizable month, it might be some form of the day of the week. Whatever it is, we simply skip over it (and the separator following it, if any) and try again.

month ← ParseSymbolicMonth[f ! BogusMonth => {first ← second; LOOP}];

'first' corresponds to a valid month. We now require that 'second' be the day of the month and 'third' be the year.

WITH d: input.tokens[second] SELECT FROM

num => day ← CollectDay[d, month];

ENDCASE => Bogus[second];

GetThird[];

year ← CollectYear[t]};

num => {

GetThird[];

WITH s: input.tokens[second] SELECT FROM

alpha => {

The second token is alpha, so we will require it to be the month.

month ← ParseSymbolicMonth[s ! BogusMonth => Bogus[second]];

Now we must decide if we have <day><month><year> or <year><month><day>

day ← CollectDay[f, month ! OutOfRange => GO TO yearFirst];

year ← CollectYear[t];

EXITS

yearFirst => {

year ← CollectYear[f];

day ← CollectDay[t, month]}};

num => {

All three parts are numeric.

year ← CollectYear[t ! OutOfRange => GO TO yearFirst];

month ← CollectMonth[f ! OutOfRange => GO TO dayFirst];

day ← CollectDay[s, month];

EXITS

yearFirst => {

year ← CollectYear[f];

month ← CollectMonth[s];

day ← CollectDay[t, month]};

dayFirst => {

month ← CollectMonth[s];

day ← CollectDay[f, month]}};

ENDCASE};

ENDCASE;

next ← third + 1;

EXIT

ENDLOOP;

Be sure we weren't slipped a bogus leap year...

IF month = 2 AND day = 29 AND ~LeapYear[year] THEN Bogus[next];

};

ParseTime: PROCEDURE [first: TokenIndex] RETURNS [next: TokenIndex] = {

ENABLE OutOfRange => ERROR Unintelligible[errorPos];

CollectHour: PROCEDURE [h: num Token] RETURNS [Hour] =

{RETURN[CollectValue[h, FIRST[Hour], LAST[Hour]]]};

CollectMinute: PROCEDURE [m: num Token] RETURNS [Minute] = INLINE

{RETURN[CollectValue[m, FIRST[Minute], LAST[Minute]]]};

CollectSecond: PROCEDURE [s: num Token] RETURNS [Second] = INLINE

{RETURN[CollectValue[s, FIRST[Second], LAST[Second]]]};

AMorPM: PROCEDURE [t: TokenIndex] RETURNS [BOOLEAN] = {

WITH m: input.tokens[t] SELECT FROM

alpha =>

IF m.length = 2 AND String.UpperCase[s[m.offset+1]] = 'M THEN {

offset: Hour ← 0;

SELECT String.UpperCase[s[m.offset]] FROM

'A => offset ← 0;

'P => offset ← 12;

ENDCASE => RETURN [FALSE];

IF ~(hour IN [1..12]) THEN Bogus[t];

hour ← (hour MOD 12) + offset;

RETURN [TRUE]};

ENDCASE;

RETURN [FALSE]

};

n: num Token;

next ← first + 1;

hour ← minute ← second ← 0;

WITH f: input.tokens[first] SELECT FROM

num => n ← f;

sep =>

IF f.char = sentinel THEN {notes ← noTime; RETURN}

ELSE Bogus[first];

ENDCASE => Bogus[first];

SELECT n.length FROM

1, 2, 4, 6 => NULL;

ENDCASE => Bogus[first];

hour ← CollectHour[[n.offset, num[MIN[n.length, 2]]]];

IF n.length <= 2 THEN {

A separator must follow a 1- or 2- digit hour field.

IF next + 1 >= input.nTokens OR input.tokens[next].type ~= sep THEN Bogus[next];

Assert: input.tokens[next+1] is alpha or num.

next ← next + 1;

WITH s: input.tokens[next] SELECT FROM

num => n ← s;

ENDCASE => Bogus[next];

SELECT n.length FROM

2, 4 => next ← next + 1;

ENDCASE => Bogus[next]}

ELSE {n.offset ← n.offset + 2; n.length ← n.length - 2};

minute ← CollectMinute[[n.offset, num[2]]];

Assert: 'next' indexes the token following the minutes.

IF n.length > 2 THEN

The minutes and seconds are concatenated.

second ← CollectSecond[[n.offset+2, num[2]]]

ELSE

Now look for optional seconds field. We assume it is present if there is a colon separator following the minutes.

WITH sep: input.tokens[next] SELECT FROM

sep =>

IF sep.char = ': THEN

Assert: input.tokens[next+1] is alpha or num.

WITH s: input.tokens[next+1] SELECT FROM

num => {second ← CollectSecond[s]; next ← next + 2};

ENDCASE => Bogus[next+1];

ENDCASE;

Assert: 'next' indexes the token following the last time part (minutes or seconds).

WITH s: input.tokens[next] SELECT FROM

sep => IF s.char ~= sentinel AND AMorPM[next+1] THEN next ← next + 2;

alpha => IF AMorPM[next] THEN next ← next + 1;

ENDCASE;

};

ParseZone: PROCEDURE [first: TokenIndex] RETURNS [next: TokenIndex] = {

ENABLE OutOfRange => ERROR Unintelligible[errorPos];

BadZone: ERROR = CODE;

CollectSymbolic: PROCEDURE [z: alpha Token] = {

char: CHARACTER ← s[z.offset];

SELECT z.length FROM

1 => {

zones: PACKED ARRAY ZoneIndex OF CHARACTER = [

'M, 'L, 'K, 'I, 'H, 'G, 'F, 'E, 'D, 'C, 'B, 'A,

'Z,

'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y];

FOR hour: ZoneIndex IN ZoneIndex DO

IF char = zones[hour] THEN

{zoneAdjust ← hour*minutesPerHour; EXIT};

REPEAT

FINISHED => GO TO badZone;

ENDLOOP};

3 => {

naZones: PACKED ARRAY NAZones OF CHARACTER = [

'B, 'H, 'Y, 'P, 'M, 'C, 'E, 'A];

IF String.UpperCase[s[z.offset+2]] ~= 'T THEN GO TO badZone;

SELECT String.UpperCase[s[z.offset+1]] FROM

'S, 'M => NULL; -- treat "mean" same as "standard"

'D => dst ← TRUE;

ENDCASE => GO TO badZone;

SELECT char FROM

'G => IF dst THEN GO TO badZone;

'N =>

IF dst THEN GO TO badZone

ELSE zoneAdjust ← -(3*minutesPerHour+30);

ENDCASE =>

FOR hour: NAZones IN NAZones DO

IF char = naZones[hour] THEN

{zoneAdjust ← hour*minutesPerHour; EXIT};

REPEAT

FINISHED => GO TO badZone;

ENDLOOP};

ENDCASE => GO TO badZone;

EXITS

badZone => ERROR BadZone;

};

CollectAbsolute: PROCEDURE [z: num Token] RETURNS [DeltaMinutes] = {

hour: Hour;

hourLength: NAT;

minute: Minute ← 0;

IF ~(z.length IN [1..4]) THEN ERROR OutOfRange[z.offset+z.length];

hourLength ← 2 - (z.length MOD 2);

hour ←

CollectValue[[z.offset, num[hourLength]], FIRST[Hour], LAST[Hour]];

IF z.length > 2 THEN

minute ←

CollectValue[[z.offset+hourLength, num[2]], FIRST[Minute], LAST[Minute]];

RETURN [hour*minutesPerHour+minute]

};

zoneAdjust ← 0; dst ← FALSE;

next ← first;

IF notes = noTime THEN RETURN;

WITH z: input.tokens[next] SELECT FROM

sep => {

zone: Token;

SELECT z.char FROM

Ascii.SP, '+, '- => zone ← input.tokens[next + 1];

ENDCASE => GO TO noZone; -- includes sentinel

WITH zone: zone SELECT FROM

num =>

SELECT z.char FROM

Ascii.SP => GO TO noZone;

'+ => zoneAdjust ← CollectAbsolute[zone];

'- => zoneAdjust ← -CollectAbsolute[zone];

ENDCASE; -- can't happen

alpha =>

IF z.char = '+ THEN Bogus[next]

ELSE

CollectSymbolic[zone ! BadZone =>

IF z.char = Ascii.SP THEN GO TO noZone ELSE Bogus[next+1]];

ENDCASE; -- can't happen

next ← next + 1};

alpha => CollectSymbolic[z ! BadZone => GO TO noZone];

ENDCASE => GO TO noZone;

EXITS

noZone => notes ← noZone;

};

LeapYear: PROCEDURE [year: Year] RETURNS [BOOLEAN] = {

RETURN[year MOD 4 = 0 AND (year MOD 100 ~= 0 OR year MOD 400 = 0)]};

ConsumeSpace: PROCEDURE [t: TokenIndex] RETURNS [TokenIndex] = {

WITH s: input.tokens[t] SELECT FROM

sep => SELECT s.char FROM Ascii.SP, ', => RETURN[t+1]; ENDCASE;

ENDCASE;

RETURN[t]

};

AssembleGMT: PROCEDURE RETURNS [System.GreenwichMeanTime] = {

DetermineZoneCorrection: PROCEDURE = {

We need to compute the local time parameters in effect at this zone on the date in question. Note that this is an imperfect simulation, since it assumes uniform dates and times for start and end of daylight savings time within the local zone.

FirstSundayAfter: PROCEDURE [ed: EpochDays] RETURNS [EpochDays] =

{RETURN [ed+7-((ed+baseDay) MOD 7)]};

ltp: System.LocalTimeParameters ←

[direction: west, zone: 8, zoneMinutes: 0, beginDST: , endDST: ];

ltp ← System.GetLocalTimeParameters[

! System.LocalTimeParametersUnknown => {

notes ← IF notes = noZone THEN zoneGuessed ELSE timeAndZoneGuessed;

CONTINUE}];

ltp.beginDST ← FirstSundayAfter[

epochToJan1 + --Apr23-- (112+(IF LeapYear[year] THEN 1 ELSE 0))] - epochToJan1;

ltp.endDST ← FirstSundayAfter[

epochToJan1 + --Oct24-- (296+(IF LeapYear[year] THEN 1 ELSE 0))] - epochToJan1;

zoneAdjust ← ltp.zone * minutesPerHour + ltp.zoneMinutes;

IF ltp.direction = west THEN zoneAdjust ← - zoneAdjust;

dst ← SELECT jan1ToDate FROM

IN (ltp.beginDST..ltp.endDST) => TRUE,

= ltp.beginDST => (hour >= 2),

= ltp.endDST => (hour < 2),

ENDCASE => FALSE;

};

YearBoundaries: PROCEDURE [year: Year, span: NAT] RETURNS [NAT] = INLINE

The following expression computes the number of years y in [FIRST[Year]..year) for which y MOD span is 0.

{RETURN[(year+span-1)/span - (FIRST[Year]+span-1)/span]};

epochToJan1: EpochDays ← (year - FIRST[Year])*365 +

YearBoundaries[year, 4] - YearBoundaries[year, 100] + YearBoundaries[year, 400];

jan1ToDate: YearDays ←

cumDays[month-1] + (day-1) - (IF ~LeapYear[year] AND month > 2 THEN 1 ELSE 0);

IF notes ~= normal THEN DetermineZoneCorrection[];

The following gross arithmetic is required to avoid overflows.

RETURN [System.GreenwichMeanTime[

((epochToJan1+jan1ToDate)+(System.gmtEpoch/secondsPerDay))*secondsPerDay +

((INT[hour]-

(IF dst THEN 1 ELSE 0))*minutesPerHour+minute-zoneAdjust)*secondsPerMinute+

second]];

};

*** Main Body of Parse ***

nextToken: TokenIndex ← SUCC[FIRST[TokenIndex]];

notes ← normal;

BEGIN

ENABLE Unintelligible => Heap.systemZone.FREE[@input];

Tokenize[];

Assert: The input has been tokenized such that:

input.tokens[0].kind = input.tokens[input.nTokens-1].kind = sep[sentinel]]

for all i IN (0..input.nTokens-1):

if input.tokens[i].type = sep, then

input.tokens[i-1].type ~= sep and input.tokens[i+1].type ~= sep

nextToken ← ParseDate[nextToken ! Unintelligible => GO TO tryTimeFirst];

nextToken ← ConsumeSpace[nextToken];

nextToken ← ParseTime[nextToken];

nextToken ← ParseZone[nextToken];

EXITS

tryTimeFirst => {

nextToken ← ParseTime[nextToken];

nextToken ← ParseZone[nextToken];

nextToken ← ConsumeSpace[nextToken];

nextToken ← ParseDate[nextToken];

};

END;

dt ← AssembleGMT[];

length ← input.tokens[nextToken].offset;

Heap.systemZone.FREE[@input];

};

END.