DateAndTimeUnsafeImpl.mesa
last edited by Levin on November 1, 1982 12:01 pm
DIRECTORY
Ascii USING [SP, TAB],
DateAndTimeUnsafe USING [Notes],
Heap USING [systemZone],
LongString USING [EquivalentSubStrings, SubStringDescriptor, UpperCase],
System USING [
GetLocalTimeParameters, gmtEpoch, GreenwichMeanTime, LocalTimeParameters,
LocalTimeParametersUnknown];
DateAndTimeUnsafeImpl: PROGRAM
IMPORTS Heap, String: LongString, System
EXPORTS DateAndTimeUnsafe =
BEGIN
Unintelligible: PUBLIC ERROR [vicinity: NAT] = CODE;
hoursPerDay: INT = 24;
minutesPerHour: INT = 60;
secondsPerMinute: INT = 60;
secondsPerDay: INT = hoursPerDay*minutesPerHour*secondsPerMinute;
baseYear: NAT = 1968;
baseDay: NAT[0..7) = 1; -- January 1, 1968 was a Monday (0 = Sunday)
EpochDays: TYPE = INT[0..LAST[LONG CARDINAL]/secondsPerDay];
YearDays: TYPE = INT[0..366];
The following computation isn't exactly correct, since it doesn't account for the absence of leap days in century years not divisible by 400. As it happens, the actual values for baseYear and LAST[EpochDays] are such that there is only one such day, and it doesn't affect the arithmetic.
Year: TYPE = [baseYear..baseYear+(LAST[EpochDays]/(4*365+1))*4];
Month: TYPE = [1..12];
Day: TYPE = [1..31];
Hour: TYPE = [0..hoursPerDay);
Minute: TYPE = [0..minutesPerHour);
Second: TYPE = [0..secondsPerMinute);
DeltaMinutes: TYPE = INT --(-hoursPerDay*minutesPerHour..hoursPerDay*minutesPerHour)--;
ZoneIndex: TYPE = INT [-12..12];
NAZones: TYPE = ZoneIndex [-11..-4];
Parse: PUBLIC PROCEDURE [s: LONG STRING]
RETURNS [dt: System.GreenwichMeanTime, notes: DateAndTimeUnsafe.Notes, length: NAT] = {
maxReasonableTokens: NAT = 30;
TokenCount: TYPE = NAT[0..maxReasonableTokens];
TokenIndex: TYPE = NAT[FIRST[TokenCount]..LAST[TokenCount]);
CharIndex: TYPE = NAT;
CharCount: TYPE = NAT[0..37777B]; -- this upper bound saves a word in Token
Tokens: TYPE = RECORD [
nTokens: TokenCount ← 0,
tokens: SEQUENCE length: TokenCount OF Token ← NULL];
Token: TYPE = RECORD [
offset: CharIndex,
kind: SELECT type: * FROM
alpha => [length: CharCount],
num => [length: CharCount],
sep => [char: CHARACTER],
ENDCASE];
input: LONG POINTER TO Tokens ← Heap.systemZone.NEW[Tokens[maxReasonableTokens] ← []];
sentinel: CHARACTER = 200C;
Bogus: PROCEDURE [ti: TokenIndex] = {ERROR Unintelligible[input.tokens[ti].offset]};
AddToken: PROCEDURE [t: Token] = {
If there are more than maxReasonableTokens, each subsequent call to this procedure will overwrite the last token stored. Eventually, the scan will complete and the final token will be a sentinel.
IF input.nTokens = input.length THEN input.tokens[input.nTokens-1] ← t
ELSE {input.tokens[input.nTokens] ← t; input.nTokens ← input.nTokens + 1};
};
Tokenize: PROCEDURE = {
state: {initial, num, alpha} ← initial;
i: NAT ← 0;
tStart: NAT;
AddToken[[0, sep[sentinel]]];
DO
char: CHARACTER;
SELECT TRUE FROM
s = NIL, i = s.length => char ← sentinel;
s[i] = Ascii.TAB => char ← Ascii.SP;
s[i] IN [40C..176C] => char ← s[i];
ENDCASE => --illegal; terminate scan-- char ← sentinel;
SELECT char FROM
IN ['0..'9] =>
SELECT state FROM
initial => {state ← num; tStart ← i};
num => NULL;
alpha => {AddToken[[tStart, alpha[i-tStart]]]; state ← num; tStart ← i};
ENDCASE;
',, '., ':, '/, '-, '+, Ascii.SP, sentinel => {
SELECT state FROM
initial => NULL;
num => {AddToken[[tStart, num[i-tStart]]]; state ← initial};
alpha => {AddToken[[tStart, alpha[i-tStart]]]; state ← initial};
ENDCASE;
The effect of the following is to ignore spaces as separators unless there is nothing else, and to terminate the scan if two separator characters are adjacent.
WITH t: input.tokens[input.nTokens-1] SELECT FROM
sep =>
IF t.char = Ascii.SP THEN t.char ← char
ELSE IF char ~= Ascii.SP THEN --illegal-- t.char ← char ← sentinel;
ENDCASE => AddToken[[i, sep[char]]];
IF char = sentinel THEN EXIT};
ENDCASE =>
SELECT state FROM
initial => {state ← alpha; tStart ← i};
num => {AddToken[[tStart, num[i-tStart]]]; state ← alpha; tStart ← i};
alpha => NULL;
ENDCASE;
i ← i + 1;
ENDLOOP;
};
year: Year;
month: Month;
day: Day;
hour: Hour;
minute: Minute;
second: Second;
zoneAdjust: DeltaMinutes;
dst: BOOLEAN;
cumDays: ARRAY [FIRST[Month]-1..LAST[Month]] OF NAT = [
0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366];
OutOfRange: ERROR [errorPos: NAT] = CODE;
CollectValue: PROCEDURE [t: num Token, low, high: NAT] RETURNS [value: NAT ← 0] = {
FOR j: CharIndex IN [t.offset..t.offset+t.length) DO
We would like to catch a bounds fault here and turn it into OutOfRange. Unfortunately, the current implementation of catch phrases doesn't support this, since a catch phrase is only attached to procedure calls. Accordingly, we have to do the test by hand. The actual test is a bit too stringent, but it doesn't really matter.
IF value > LAST[NAT]/10 - 1 THEN ERROR OutOfRange[j];
value ← value * 10 + (s[j] - '0);
ENDLOOP;
IF ~(value IN [low..high]) THEN ERROR OutOfRange[t.offset+t.length];
};
ParseDate: PROCEDURE [first: TokenIndex] RETURNS [next: TokenIndex] = {
BogusMonth: ERROR = CODE;
ParseSymbolicMonth: PROCEDURE [t: alpha Token] RETURNS [month: Month] = {
MonthNames: TYPE = ARRAY Month OF STRING;
ss: String.SubStringDescriptor ← [base: s, offset: t.offset, length: t.length];
SELECT String.UpperCase[s[t.offset]] FROM
'I, 'V, 'X => {
roman: MonthNames ← [
"I"L, "II"L, "III"L, "IV"L, "V"L, "VI"L, "VII"L, "VIII"L, "IX"L, "X"L, "XI"L, "XII"L];
FOR month IN Month DO
mm: String.SubStringDescriptor ←
[base: roman[month], offset: 0, length: roman[month].length];
IF String.EquivalentSubStrings[@mm, @ss] THEN EXIT;
REPEAT
FINISHED => ERROR BogusMonth;
ENDLOOP
};
ENDCASE => {
english: MonthNames ← [
"January"L, "February"L, "March"L, "April"L, "May"L, "June"L,
"July"L, "August"L, "September"L, "October"L, "November"L, "December"L];
IF t.length < 3 THEN ERROR BogusMonth;
FOR month IN Month DO
mm: String.SubStringDescriptor ←
[base: english[month], offset: 0, length: t.length];
IF String.EquivalentSubStrings[@mm, @ss] THEN EXIT;
REPEAT
FINISHED => ERROR BogusMonth;
ENDLOOP
};
};
CollectYear: PROCEDURE [y: num Token] RETURNS [Year] = {
RETURN[1900+CollectValue[y, FIRST[Year]-1900, 99 ! OutOfRange => CONTINUE]];
RETURN[CollectValue[y, FIRST[Year], LAST[Year]]]
};
CollectMonth: PROCEDURE [m: num Token] RETURNS [Month] = INLINE
{RETURN[CollectValue[m, FIRST[Month], LAST[Month]]]};
CollectDay: PROCEDURE [d: num Token, m: Month] RETURNS [Day] = INLINE
{RETURN[CollectValue[d, FIRST[Day], cumDays[m]-cumDays[m-1]]]};
DO
ENABLE OutOfRange => ERROR Unintelligible[errorPos];
second, third: TokenIndex;
t: num Token;
GetThird: PROCEDURE = {
WITH token: input.tokens[third] SELECT FROM
num => t ← token;
ENDCASE => Bogus[third]
};
IF input.nTokens < first + 3 OR input.tokens[first].type = sep THEN Bogus[first];
Assert: 'first' is alpha or num and at least two tokens follow it.
IF input.tokens[second𡤏irst+1].type = sep THEN second ← second + 1;
Assert: 'second' is alpha or num.
Ergo, second+1 < input.nTokens since input.tokens[input.nTokens-1].type = sep
IF input.tokens[third←second+1].type = sep THEN {
IF third = input.nTokens-1 THEN Bogus[third];
third ← third + 1};
Assert: 'third' is alpha or num. If we are really processing a date, 'third' will have to be numeric to make sense. However, we might be processing something like Wednesday, 26 May 1981, in which there is a valid date, but 'first' isn't pointing to it yet (it's still on Wednesday). So, we don't check that 'third' is numeric until we are sure that 'first' is pointing to something reasonable.
WITH f: input.tokens[first] SELECT FROM
alpha => {
The first token is alpha. If it isn't a recognizable month, it might be some form of the day of the week. Whatever it is, we simply skip over it (and the separator following it, if any) and try again.
month ← ParseSymbolicMonth[f ! BogusMonth => {first ← second; LOOP}];
'first' corresponds to a valid month. We now require that 'second' be the day of the month and 'third' be the year.
WITH d: input.tokens[second] SELECT FROM
num => day ← CollectDay[d, month];
ENDCASE => Bogus[second];
GetThird[];
year ← CollectYear[t]};
num => {
GetThird[];
WITH s: input.tokens[second] SELECT FROM
alpha => {
The second token is alpha, so we will require it to be the month.
month ← ParseSymbolicMonth[s ! BogusMonth => Bogus[second]];
Now we must decide if we have <day><month><year> or <year><month><day>
day ← CollectDay[f, month ! OutOfRange => GO TO yearFirst];
year ← CollectYear[t];
EXITS
yearFirst => {
year ← CollectYear[f];
day ← CollectDay[t, month]}};
num => {
All three parts are numeric.
year ← CollectYear[t ! OutOfRange => GO TO yearFirst];
month ← CollectMonth[f ! OutOfRange => GO TO dayFirst];
day ← CollectDay[s, month];
EXITS
yearFirst => {
year ← CollectYear[f];
month ← CollectMonth[s];
day ← CollectDay[t, month]};
dayFirst => {
month ← CollectMonth[s];
day ← CollectDay[f, month]}};
ENDCASE};
ENDCASE;
next ← third + 1;
EXIT
ENDLOOP;
Be sure we weren't slipped a bogus leap year...
IF month = 2 AND day = 29 AND ~LeapYear[year] THEN Bogus[next];
};
ParseTime: PROCEDURE [first: TokenIndex] RETURNS [next: TokenIndex] = {
ENABLE OutOfRange => ERROR Unintelligible[errorPos];
CollectHour: PROCEDURE [h: num Token] RETURNS [Hour] =
{RETURN[CollectValue[h, FIRST[Hour], LAST[Hour]]]};
CollectMinute: PROCEDURE [m: num Token] RETURNS [Minute] = INLINE
{RETURN[CollectValue[m, FIRST[Minute], LAST[Minute]]]};
CollectSecond: PROCEDURE [s: num Token] RETURNS [Second] = INLINE
{RETURN[CollectValue[s, FIRST[Second], LAST[Second]]]};
AMorPM: PROCEDURE [t: TokenIndex] RETURNS [BOOLEAN] = {
WITH m: input.tokens[t] SELECT FROM
alpha =>
IF m.length = 2 AND String.UpperCase[s[m.offset+1]] = 'M THEN {
offset: Hour ← 0;
SELECT String.UpperCase[s[m.offset]] FROM
'A => offset ← 0;
'P => offset ← 12;
ENDCASE => RETURN [FALSE];
IF ~(hour IN [1..12]) THEN Bogus[t];
hour ← (hour MOD 12) + offset;
RETURN [TRUE]};
ENDCASE;
RETURN [FALSE]
};
n: num Token;
next ← first + 1;
hour ← minute ← second ← 0;
WITH f: input.tokens[first] SELECT FROM
num => n ← f;
sep =>
IF f.char = sentinel THEN {notes ← noTime; RETURN}
ELSE Bogus[first];
ENDCASE => Bogus[first];
SELECT n.length FROM
1, 2, 4, 6 => NULL;
ENDCASE => Bogus[first];
hour ← CollectHour[[n.offset, num[MIN[n.length, 2]]]];
IF n.length <= 2 THEN {
A separator must follow a 1- or 2- digit hour field.
IF next + 1 >= input.nTokens OR input.tokens[next].type ~= sep THEN Bogus[next];
Assert: input.tokens[next+1] is alpha or num.
next ← next + 1;
WITH s: input.tokens[next] SELECT FROM
num => n ← s;
ENDCASE => Bogus[next];
SELECT n.length FROM
2, 4 => next ← next + 1;
ENDCASE => Bogus[next]}
ELSE {n.offset ← n.offset + 2; n.length ← n.length - 2};
minute ← CollectMinute[[n.offset, num[2]]];
Assert: 'next' indexes the token following the minutes.
IF n.length > 2 THEN
The minutes and seconds are concatenated.
second ← CollectSecond[[n.offset+2, num[2]]]
ELSE
Now look for optional seconds field. We assume it is present if there is a colon separator following the minutes.
WITH sep: input.tokens[next] SELECT FROM
sep =>
IF sep.char = ': THEN
Assert: input.tokens[next+1] is alpha or num.
WITH s: input.tokens[next+1] SELECT FROM
num => {second ← CollectSecond[s]; next ← next + 2};
ENDCASE => Bogus[next+1];
ENDCASE;
Assert: 'next' indexes the token following the last time part (minutes or seconds).
WITH s: input.tokens[next] SELECT FROM
sep => IF s.char ~= sentinel AND AMorPM[next+1] THEN next ← next + 2;
alpha => IF AMorPM[next] THEN next ← next + 1;
ENDCASE;
};
ParseZone: PROCEDURE [first: TokenIndex] RETURNS [next: TokenIndex] = {
ENABLE OutOfRange => ERROR Unintelligible[errorPos];
BadZone: ERROR = CODE;
CollectSymbolic: PROCEDURE [z: alpha Token] = {
char: CHARACTER ← s[z.offset];
SELECT z.length FROM
1 => {
zones: PACKED ARRAY ZoneIndex OF CHARACTER = [
'M, 'L, 'K, 'I, 'H, 'G, 'F, 'E, 'D, 'C, 'B, 'A,
'Z,
'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y];
FOR hour: ZoneIndex IN ZoneIndex DO
IF char = zones[hour] THEN
{zoneAdjust ← hour*minutesPerHour; EXIT};
REPEAT
FINISHED => GO TO badZone;
ENDLOOP};
3 => {
naZones: PACKED ARRAY NAZones OF CHARACTER = [
'B, 'H, 'Y, 'P, 'M, 'C, 'E, 'A];
IF String.UpperCase[s[z.offset+2]] ~= 'T THEN GO TO badZone;
SELECT String.UpperCase[s[z.offset+1]] FROM
'S, 'M => NULL; -- treat "mean" same as "standard"
'D => dst ← TRUE;
ENDCASE => GO TO badZone;
SELECT char FROM
'G => IF dst THEN GO TO badZone;
'N =>
IF dst THEN GO TO badZone
ELSE zoneAdjust ← -(3*minutesPerHour+30);
ENDCASE =>
FOR hour: NAZones IN NAZones DO
IF char = naZones[hour] THEN
{zoneAdjust ← hour*minutesPerHour; EXIT};
REPEAT
FINISHED => GO TO badZone;
ENDLOOP};
ENDCASE => GO TO badZone;
EXITS
badZone => ERROR BadZone;
};
CollectAbsolute: PROCEDURE [z: num Token] RETURNS [DeltaMinutes] = {
hour: Hour;
hourLength: NAT;
minute: Minute ← 0;
IF ~(z.length IN [1..4]) THEN ERROR OutOfRange[z.offset+z.length];
hourLength ← 2 - (z.length MOD 2);
hour ←
CollectValue[[z.offset, num[hourLength]], FIRST[Hour], LAST[Hour]];
IF z.length > 2 THEN
minute ←
CollectValue[[z.offset+hourLength, num[2]], FIRST[Minute], LAST[Minute]];
RETURN [hour*minutesPerHour+minute]
};
zoneAdjust ← 0; dst ← FALSE;
next ← first;
IF notes = noTime THEN RETURN;
WITH z: input.tokens[next] SELECT FROM
sep => {
zone: Token;
SELECT z.char FROM
Ascii.SP, '+, '- => zone ← input.tokens[next + 1];
ENDCASE => GO TO noZone; -- includes sentinel
WITH zone: zone SELECT FROM
num =>
SELECT z.char FROM
Ascii.SP => GO TO noZone;
'+ => zoneAdjust ← CollectAbsolute[zone];
'- => zoneAdjust ← -CollectAbsolute[zone];
ENDCASE; -- can't happen
alpha =>
IF z.char = '+ THEN Bogus[next]
ELSE
CollectSymbolic[zone ! BadZone =>
IF z.char = Ascii.SP THEN GO TO noZone ELSE Bogus[next+1]];
ENDCASE; -- can't happen
next ← next + 1};
alpha => CollectSymbolic[z ! BadZone => GO TO noZone];
ENDCASE => GO TO noZone;
EXITS
noZone => notes ← noZone;
};
LeapYear: PROCEDURE [year: Year] RETURNS [BOOLEAN] = {
RETURN[year MOD 4 = 0 AND (year MOD 100 ~= 0 OR year MOD 400 = 0)]};
ConsumeSpace: PROCEDURE [t: TokenIndex] RETURNS [TokenIndex] = {
WITH s: input.tokens[t] SELECT FROM
sep => SELECT s.char FROM Ascii.SP, ', => RETURN[t+1]; ENDCASE;
ENDCASE;
RETURN[t]
};
AssembleGMT: PROCEDURE RETURNS [System.GreenwichMeanTime] = {
DetermineZoneCorrection: PROCEDURE = {
We need to compute the local time parameters in effect at this zone on the date in question. Note that this is an imperfect simulation, since it assumes uniform dates and times for start and end of daylight savings time within the local zone.
FirstSundayAfter: PROCEDURE [ed: EpochDays] RETURNS [EpochDays] =
{RETURN [ed+7-((ed+baseDay) MOD 7)]};
ltp: System.LocalTimeParameters ←
[direction: west, zone: 8, zoneMinutes: 0, beginDST: , endDST: ];
ltp ← System.GetLocalTimeParameters[
! System.LocalTimeParametersUnknown => {
notes ← IF notes = noZone THEN zoneGuessed ELSE timeAndZoneGuessed;
CONTINUE}];
ltp.beginDST ← FirstSundayAfter[
epochToJan1 + --Apr23-- (112+(IF LeapYear[year] THEN 1 ELSE 0))] - epochToJan1;
ltp.endDST ← FirstSundayAfter[
epochToJan1 + --Oct24-- (296+(IF LeapYear[year] THEN 1 ELSE 0))] - epochToJan1;
zoneAdjust ← ltp.zone * minutesPerHour + ltp.zoneMinutes;
IF ltp.direction = west THEN zoneAdjust ← - zoneAdjust;
dst ← SELECT jan1ToDate FROM
IN (ltp.beginDST..ltp.endDST) => TRUE,
= ltp.beginDST => (hour >= 2),
= ltp.endDST => (hour < 2),
ENDCASE => FALSE;
};
YearBoundaries: PROCEDURE [year: Year, span: NAT] RETURNS [NAT] = INLINE
The following expression computes the number of years y in [FIRST[Year]..year) for which y MOD span is 0.
{RETURN[(year+span-1)/span - (FIRST[Year]+span-1)/span]};
epochToJan1: EpochDays ← (year - FIRST[Year])*365 +
YearBoundaries[year, 4] - YearBoundaries[year, 100] + YearBoundaries[year, 400];
jan1ToDate: YearDays ←
cumDays[month-1] + (day-1) - (IF ~LeapYear[year] AND month > 2 THEN 1 ELSE 0);
IF notes ~= normal THEN DetermineZoneCorrection[];
The following gross arithmetic is required to avoid overflows.
RETURN [System.GreenwichMeanTime[
((epochToJan1+jan1ToDate)+(System.gmtEpoch/secondsPerDay))*secondsPerDay +
((INT[hour]-
(IF dst THEN 1 ELSE 0))*minutesPerHour+minute-zoneAdjust)*secondsPerMinute+
second]];
};
*** Main Body of Parse ***
nextToken: TokenIndex ← SUCC[FIRST[TokenIndex]];
notes ← normal;
BEGIN
ENABLE Unintelligible => Heap.systemZone.FREE[@input];
Tokenize[];
Assert: The input has been tokenized such that:
input.tokens[0].kind = input.tokens[input.nTokens-1].kind = sep[sentinel]]
for all i IN (0..input.nTokens-1):
if input.tokens[i].type = sep, then
input.tokens[i-1].type ~= sep and input.tokens[i+1].type ~= sep
nextToken ← ParseDate[nextToken ! Unintelligible => GO TO tryTimeFirst];
nextToken ← ConsumeSpace[nextToken];
nextToken ← ParseTime[nextToken];
nextToken ← ParseZone[nextToken];
EXITS
tryTimeFirst => {
nextToken ← ParseTime[nextToken];
nextToken ← ParseZone[nextToken];
nextToken ← ConsumeSpace[nextToken];
nextToken ← ParseDate[nextToken];
};
END;
dt ← AssembleGMT[];
length ← input.tokens[nextToken].offset;
Heap.systemZone.FREE[@input];
};
END.