IODateAndTimeImpl.mesa
Last edited by:
Levin on November 16, 1983 11:54 am
MBrown on September 20, 1983 10:39 pm
DIRECTORY
Ascii USING [SP, TAB, Upper],
BasicTime USING [
DayOfWeek, hoursPerDay, minutesPerHour, MonthOfYear, secondsPerMinute, Unpacked],
IO USING [Backup, EndOf, Error, GetChar, STREAM],
RefText USING [AppendChar, ObtainScratch, ReleaseScratch, TrustTextAsRope],
Rope USING [Equal, ROPE, Substr];
IODateAndTimeImpl: CEDAR PROGRAM
IMPORTS Ascii, IO, RefText, Rope
EXPORTS IO
= BEGIN
hoursPerDay: NAT = BasicTime.hoursPerDay;
minutesPerHour: NAT = BasicTime.minutesPerHour;
secondsPerMinute: NAT = BasicTime.secondsPerMinute;
secondsPerDay: INT = hoursPerDay.LONG*minutesPerHour*secondsPerMinute;
baseYear: NAT = 1968;
EpochDays: TYPE = INT[0..LAST[LONG CARDINAL]/secondsPerDay];
The following computation isn't exactly correct, since it doesn't account for the absence of leap days in century years not divisible by 400. As it happens, the actual values for baseYear and LAST[EpochDays] are such that there is at most one such day, and it doesn't affect the arithmetic.
Year: TYPE = NAT[baseYear..MIN[baseYear+(LAST[EpochDays]/(4*365+1))*4, 2036--BasicTime's limit--]];
Month: TYPE = NAT[1..12];
MonthOfYear: TYPE = BasicTime.MonthOfYear[January..December];
Day: TYPE = NAT[1..31];
Hour: TYPE = NAT[0..hoursPerDay);
Minute: TYPE = NAT[0..minutesPerHour);
Second: TYPE = NAT[0..secondsPerMinute);
DeltaMinutes: TYPE = INTEGER[-hoursPerDay*minutesPerHour..hoursPerDay*minutesPerHour];
defined as GMT minus local mean time
ZoneIndex: TYPE = INT [-12..12];
NAZones: TYPE = ZoneIndex[4..11];
ROPE: TYPE = Rope.ROPE;
initialTokens: NAT = 20;
initialBackingChars: NAT = 100;
TokenCount: TYPE = NAT;
TokenIndex: TYPE = NAT[TokenCount.FIRST..TokenCount.LAST);
CharIndex: TYPE = NAT;
CharCount: TYPE = NAT[0..37777B]; -- this upper bound saves a word in Token
Input: TYPE = RECORD [
nTokens: TokenCount,
scratch: REF TEXT,
backing: REF TEXT,
tokens: SEQUENCE length: TokenCount OF Token
];
Token: TYPE = RECORD [
offset: CharIndex,
kind: SELECT type: * FROM
alpha => [length: CharCount],
num => [length: CharCount],
sep => [char: CHAR],
ENDCASE
];
sentinel: CHAR = 200C;
GetUnpackedTime: PUBLIC PROC [s: IO.STREAM] RETURNS [time: BasicTime.Unpacked ← []] = {
Unintelligible: ERROR [pos: NAT] = CODE;
input: REF Input;
***************
*
* Scanner
*
***************
The tokens built up in the input sequence (input.tokens) satisfy the following properties:
input.tokens[0] is a sentinel separator
two separators never appear consecutively
a sentinel separator delimits the end of interesting input
InitializeInput: PROC = {
input ← NEW[Input[initialTokens]];
input.scratch ← RefText.ObtainScratch[initialBackingChars];
input.backing ← RefText.AppendChar[input.scratch, sentinel];
input.nTokens ← 0;
AddToken[[0, sep[sentinel]]];
};
FinalizeInput: PROC = {RefText.ReleaseScratch[input.scratch]};
state: {initial, num, alpha, eof} ← $initial;
GetToken: PROC [ti: TokenIndex] RETURNS [Token] = {
UNTIL input.nTokens > ti DO
IF state = $eof THEN RETURN [input.tokens[input.nTokens-1]];
ReadMoreTokens[];
ENDLOOP;
RETURN[input.tokens[ti]]
};
ReadMoreTokens: PROC = {
i: NAT;
GetChar: PROC RETURNS [char: CHAR] = {
char ← IF s.EndOf[] THEN sentinel ELSE s.GetChar[];
i ← input.backing.length;
input.backing ← RefText.AppendChar[input.backing, char];
};
Backup: PROC = {
s.Backup[input.backing[i ← input.backing.length.PRED]];
input.backing.length ← i;
};
tStart: NAT;
IF state ~= $initial THEN ERROR;
DO
char: CHAR ← GetChar[];
SELECT char FROM
Ascii.TAB => char ← Ascii.SP;
IN [40C..176C] => NULL;
ENDCASE => --illegal; terminate scan-- char ← sentinel;
SELECT char FROM
IN ['0..'9] =>
SELECT state FROM
$initial => {state ← $num; tStart ← i};
$num => NULL;
$alpha => {AddToken[[tStart, alpha[i-tStart]]]; Backup[]; EXIT};
ENDCASE;
',, '., ':, '/, '-, '+, Ascii.SP, sentinel =>
SELECT state FROM
$initial =>
WITH t: input.tokens[input.nTokens-1] SELECT FROM
sep => {
The effect of the following is to ignore spaces as separators unless there is nothing else, and to terminate the scan if two separator characters are adjacent.
IF t.char = Ascii.SP THEN t.char ← char
ELSE IF char ~= Ascii.SP THEN --illegal-- t.char ← char ← sentinel;
IF char = sentinel THEN {state ← $eof; EXIT};
};
ENDCASE => {
AddToken[[i, sep[char]]];
IF char = sentinel THEN state ← $eof;
EXIT
};
$num => {AddToken[[tStart, num[i-tStart]]]; AddToken[[i, sep[char]]]; EXIT};
$alpha => {AddToken[[tStart, alpha[i-tStart]]]; AddToken[[i, sep[char]]]; EXIT};
ENDCASE;
ENDCASE =>
SELECT state FROM
$initial => {state ← $alpha; tStart ← i};
$num => {AddToken[[tStart, num[i-tStart]]]; Backup[]; EXIT};
$alpha => NULL;
ENDCASE;
ENDLOOP;
IF state ~= $eof THEN state ← $initial;
};
AddToken: PROC [t: Token] = {
The TRUSTED statements in this procedure are actually SAFE, but the compiler is too conservative in its treatment of variant records. Our Tokens contain no REFs, so we should be able to manipulate them freely without compromising the integrity of storage. Unfortunately, the compiler doesn't understand that.
IF input.nTokens = input.length THEN {
newInput: REF Input ← NEW[Input[(input.length*3)/2]];
newInput.nTokens ← input.nTokens;
newInput.backing ← input.backing;
FOR i: NAT IN [0..input.nTokens) DO
TRUSTED{newInput.tokens[i] ← input.tokens[i]};
ENDLOOP;
input ← newInput;
}
ELSE {TRUSTED{input.tokens[input.nTokens] ← t}; input.nTokens ← input.nTokens.SUCC};
};
BackupStream: PROC [to: CharIndex] = {
FOR i: CharIndex DECREASING IN [to..input.backing.length) DO
char: CHAR = input.backing[i];
IF char ~= sentinel THEN s.Backup[char];
ENDLOOP;
};
***************
*
* Weekday Parser
*
***************
ParseWeekday: PROC [first: TokenIndex] RETURNS [next: TokenIndex] = {
next ← first;
WITH GetToken[first] SELECT FROM
t: alpha Token => {
DayOfWeek: TYPE = BasicTime.DayOfWeek[Monday..Sunday];
weekdays: ARRAY DayOfWeek OF ROPE = [
"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"];
a: ROPE =
Rope.Substr[base: RefText.TrustTextAsRope[input.backing], start: t.offset, len: t.length];
IF t.length >= 2 THEN
FOR i: DayOfWeek IN DayOfWeek DO
IF a.Equal[s2: weekdays[i].Substr[len: t.length], case: FALSE] THEN
RETURN[ConsumeSPOrComma[first.SUCC]];
ENDLOOP;
};
ENDCASE;
};
***************
*
* Date Parser
*
***************
ParseDate: PROC [first: TokenIndex] RETURNS [next: TokenIndex] = {
ENABLE OutOfRange => ERROR Unintelligible[pos];
BogusMonth: ERROR = CODE;
ParseSymbolicMonth: PROC [t: alpha Token] RETURNS [month: MonthOfYear] = {
MonthNames: TYPE = ARRAY MonthOfYear OF ROPE;
m: ROPE =
Rope.Substr[base: RefText.TrustTextAsRope[input.backing], start: t.offset, len: t.length];
SELECT Ascii.Upper[input.backing[t.offset]] FROM
'I, 'V, 'X => {
roman: MonthNames ← [
"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X", "XI", "XII"];
FOR month IN MonthOfYear DO
IF m.Equal[s2: roman[month], case: FALSE] THEN RETURN;
ENDLOOP;
};
ENDCASE => {
english: MonthNames ← [
"January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December"];
IF t.length >= 3 THEN
FOR month IN MonthOfYear DO
IF m.Equal[s2: english[month].Substr[len: t.length], case: FALSE] THEN RETURN;
ENDLOOP;
};
ERROR BogusMonth
};
CollectYear: PROC [y: num Token] RETURNS [Year] = {
This assumes Year.FIRST is in the 20th century
RETURN[1900+CollectValue[y, Year.FIRST-1900, 99 ! OutOfRange => CONTINUE]];
RETURN[CollectValue[y, Year.FIRST, Year.LAST]]
};
CollectMonth: PROC [m: num Token] RETURNS [MonthOfYear] = INLINE
{RETURN[VAL[CollectValue[m, Month.FIRST, Month.LAST]-Month.FIRST]]};
daysForMonth: ARRAY MonthOfYear OF NAT = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
CollectDay: PROC [d: num Token, m: MonthOfYear] RETURNS [Day] = INLINE
{RETURN[CollectValue[d, Day.FIRST, daysForMonth[m]]]};
second, third: TokenIndex;
t: num Token;
IF GetToken[first].type = sep THEN Bogus[first];
Assert: 'first' is alpha or num.
WITH GetToken[second𡤏irst.SUCC] SELECT FROM
sep: sep Token => IF sep.char = sentinel THEN Bogus[second] ELSE second ← second.SUCC;
ENDCASE;
Assert: 'second' is alpha or num. (Reasoning: if first.SUCC was a separator other than the final sentinel, then it must be followed by at least one non-separator token, since two separators cannot be adjacent, and the token sequence always ends with a sentinel, which is a separator.
WITH GetToken[third←second.SUCC] SELECT FROM
sep: sep Token => IF sep.char = sentinel THEN Bogus[third] ELSE third ← third.SUCC;
ENDCASE;
Assert: 'third' is alpha or num. (Reasoning is similar to that for 'second'.) In all legal cases, 'third' must be numeric, so we check for that now.
WITH GetToken[third] SELECT FROM
y: num Token => t ← y;
ENDCASE => Bogus[third];
WITH GetToken[first] SELECT FROM
m: alpha Token => {
time.year ← CollectYear[t];
time.month ← ParseSymbolicMonth[m ! BogusMonth => Bogus[first]];
'first' corresponds to a valid month. We now require that 'second' be the day of the month and 'third' be the year.
WITH GetToken[second] SELECT FROM
d: num Token => time.day ← CollectDay[d, time.month];
ENDCASE => Bogus[second];
};
f: num Token =>
WITH GetToken[second] SELECT FROM
m: alpha Token => {
The second token is alpha, so we will require it to be the month.
time.month ← ParseSymbolicMonth[m ! BogusMonth => Bogus[second]];
Now we must decide if we have <day><month><year> or <year><month><day>.
time.day ← CollectDay[f, time.month ! OutOfRange => GO TO yearFirst];
time.year ← CollectYear[t];
EXITS
yearFirst => {time.year ← CollectYear[f]; time.day ← CollectDay[t, time.month]};
};
s: num Token => {
All three parts are numeric.
time.year ← CollectYear[t ! OutOfRange => GO TO yearFirst];
time.month ← CollectMonth[f ! OutOfRange => GO TO dayFirst];
time.day ← CollectDay[s, time.month];
EXITS
dayFirst => {
time.month ← CollectMonth[s];
time.day ← CollectDay[f, time.month];
};
yearFirst => {
time.year ← CollectYear[f];
time.month ← CollectMonth[s];
time.day ← CollectDay[t, time.month];
};
};
ENDCASE;
ENDCASE;
next ← third.SUCC;
Be sure we weren't slipped a bogus leap year...
IF time.month = February AND time.day = 29 AND ~LeapYear[time.year] THEN Bogus[next];
};
***************
*
* Time Parser
*
***************
ParseTime: PROC [first: TokenIndex] RETURNS [next: TokenIndex] = {
ENABLE OutOfRange => ERROR Unintelligible[pos];
CollectHour: PROC [h: num Token] RETURNS [Hour] = INLINE
{RETURN[CollectValue[h, Hour.FIRST, Hour.LAST]]};
CollectMinute: PROC [m: num Token] RETURNS [Minute] = INLINE
{RETURN[CollectValue[m, Minute.FIRST, Minute.LAST]]};
CollectSecond: PROC [s: num Token] RETURNS [Second] = INLINE
{RETURN[CollectValue[s, Second.FIRST, Second.LAST]]};
AMorPM: PROC [t: TokenIndex] RETURNS [BOOL] = {
WITH GetToken[t] SELECT FROM
m: alpha Token =>
IF m.length = 2 AND Ascii.Upper[input.backing[m.offset.SUCC]] = 'M THEN {
offset: Hour;
SELECT Ascii.Upper[input.backing[m.offset]] FROM
'A => offset ← 0;
'P => offset ← 12;
ENDCASE => RETURN[FALSE];
IF ~(time.hour IN [1..12]) THEN Bogus[t];
time.hour ← (time.hour MOD 12) + offset;
RETURN[TRUE]
};
ENDCASE;
RETURN[FALSE]
};
n: num Token;
next ← first;
time.hour ← time.minute ← time.second ← 0;
WITH GetToken[first] SELECT FROM
f: num Token => n ← f;
f: sep Token => IF f.char = sentinel THEN --time omitted-- RETURN ELSE Bogus[first];
ENDCASE => Bogus[first];
next ← next.SUCC;
SELECT n.length FROM
1, 2, 4, 6 => NULL;
ENDCASE => Bogus[first];
time.hour ← CollectHour[[n.offset, num[MIN[n.length, 2]]]];
IF n.length <= 2 THEN {
A separator must follow a 1- or 2- digit hour field.
WITH GetToken[next] SELECT FROM
sep: sep Token => IF sep.char = sentinel THEN Bogus[next];
ENDCASE => Bogus[next];
Assert: 'next' is a non-sentinel separator. Therefore, 'next.SUCC' is alpha or num.
next ← next.SUCC;
WITH GetToken[next] SELECT FROM
s: num Token => n ← s;
ENDCASE => Bogus[next];
SELECT n.length FROM
2, 4 => next ← next.SUCC;
ENDCASE => Bogus[next];
}
ELSE {n.offset ← n.offset + 2; n.length ← n.length - 2};
time.minute ← CollectMinute[[n.offset, num[2]]];
Assert: 'next' indexes the token following the minutes.
IF n.length > 2 THEN
The minutes and seconds are concatenated.
time.second ← CollectSecond[[n.offset+2, num[2]]]
ELSE
Now look for optional seconds field. We assume it is present if there is a colon separator following the minutes.
WITH GetToken[next] SELECT FROM
sep: sep Token =>
IF sep.char = ': THEN
Assert: 'next.SUCC' is alpha or num.
WITH GetToken[next.SUCC] SELECT FROM
s: num Token => {time.second ← CollectSecond[s]; next ← next + 2};
ENDCASE => Bogus[next.SUCC];
ENDCASE;
Assert: 'next' indexes the token following the last time part (minutes or seconds).
WITH GetToken[next] SELECT FROM
s: sep Token => IF s.char ~= sentinel AND AMorPM[next.SUCC] THEN next ← next + 2;
a: alpha Token => IF AMorPM[next] THEN next ← next.SUCC;
ENDCASE;
};
***************
*
* Zone Parser
*
***************
ParseZone: PROC [first: TokenIndex] RETURNS [next: TokenIndex] = {
ENABLE OutOfRange => ERROR Unintelligible[pos];
BadZone: ERROR = CODE;
dst: BOOLFALSE;
CollectSymbolic: PROC [z: alpha Token] RETURNS [DeltaMinutes] = {
char: CHAR = Ascii.Upper[input.backing[z.offset]];
SELECT z.length FROM
1 => {
zones: PACKED ARRAY ZoneIndex OF CHAR = [
'Y, 'X, 'W, 'V, 'U, 'T, 'S, 'R, 'Q, 'P, 'O, 'N,
'Z,
'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'K, 'L, 'M
];
FOR hour: ZoneIndex IN ZoneIndex DO
IF char = zones[hour] THEN RETURN[hour*minutesPerHour];
ENDLOOP;
};
2 => IF char = 'U AND Ascii.Upper[input.backing[z.offset+1]] = 'T THEN RETURN[0];
3 => {
naZones: PACKED ARRAY NAZones OF CHAR = ['A, 'E, 'C, 'M, 'P, 'Y, 'H, 'B];
IF Ascii.Upper[input.backing[z.offset+2]] = 'T THEN {
SELECT Ascii.Upper[input.backing[z.offset.SUCC]] FROM
'S, 'M => NULL; -- treat "mean" same as "standard"
'D => dst ← TRUE;
ENDCASE;
SELECT char FROM
'G => IF ~dst THEN RETURN[0];
'N => IF ~dst THEN RETURN[3*minutesPerHour+30];
ENDCASE =>
FOR hour: NAZones IN NAZones DO
IF char = naZones[hour] THEN RETURN[hour*minutesPerHour];
ENDLOOP;
};
};
ENDCASE;
ERROR BadZone
};
CollectAbsolute: PROC [z: num Token] RETURNS [DeltaMinutes] = {
hour: Hour;
hourLength: NAT;
minute: Minute ← 0;
IF ~(z.length IN [1..4]) THEN ERROR OutOfRange[z.offset+z.length];
hourLength ← 2 - (z.length MOD 2);
hour ← CollectValue[[z.offset, num[hourLength]], Hour.FIRST, Hour.LAST];
IF z.length > 2 THEN
minute ← CollectValue[[z.offset+hourLength, num[2]], Minute.FIRST, Minute.LAST];
RETURN[hour*minutesPerHour+minute]
};
WITH GetToken[next ← first] SELECT FROM
z: sep Token =>
SELECT z.char FROM
Ascii.SP, '+, '- => {
WITH GetToken[next.SUCC] SELECT FROM
zone: num Token =>
SELECT z.char FROM
Ascii.SP => GO TO noZone; -- numeric must be preceded by + or -
'+ => time.zone ← CollectAbsolute[zone];
'- => time.zone ← -CollectAbsolute[zone];
ENDCASE; -- can't happen
zone: alpha Token =>
IF z.char = '+ THEN Bogus[next.SUCC]
ELSE
time.zone ← CollectSymbolic[zone ! BadZone =>
IF z.char = Ascii.SP THEN GO TO noZone ELSE Bogus[next.SUCC]];
ENDCASE; -- can't happen
next ← next.SUCC;
};
ENDCASE => GO TO noZone; -- includes sentinel
z: alpha Token => time.zone ← CollectSymbolic[z ! BadZone => GO TO noZone];
ENDCASE => GO TO noZone;
next ← next.SUCC;
time.dst ← IF dst THEN yes ELSE no;
EXITS
noZone => NULL;
};
***************
*
* Utilities
*
***************
Bogus: PROC [ti: TokenIndex] = {ERROR Unintelligible[input.tokens[ti].offset]};
OutOfRange: ERROR [pos: CharIndex] = CODE;
CollectValue: PROC [t: num Token, low, high: NAT] RETURNS [value: NAT ← 0] = {
FOR j: CharIndex IN [t.offset..t.offset+t.length) DO
We would like to catch a bounds fault here and turn it into OutOfRange. Unfortunately, the current implementation of catch phrases doesn't support this, since a catch phrase is only attached to procedure calls. Accordingly, we have to do the test by hand. The actual test is a bit too stringent, but it doesn't really matter.
IF value > NAT.LAST/10 - 1 THEN ERROR OutOfRange[j];
value ← value * 10 + (input.backing[j] - '0);
ENDLOOP;
IF ~(value IN [low..high]) THEN ERROR OutOfRange[t.offset+t.length];
};
LeapYear: PROC [year: Year] RETURNS [BOOL] = {
RETURN[year MOD 4 = 0 AND (year MOD 100 ~= 0 OR year MOD 400 = 0)]};
ConsumeSPOrComma: PROC [t: TokenIndex] RETURNS [TokenIndex] = {
WITH GetToken[t] SELECT FROM
s: sep Token => SELECT s.char FROM Ascii.SP, ', => RETURN[t.SUCC]; ENDCASE;
ENDCASE;
RETURN[t]
};
***************
*
* Main Body
*
***************
nextToken: TokenIndex ← TokenIndex.FIRST.SUCC;
InitializeInput[];
BEGIN
ENABLE Unintelligible => {
BackupStream[to: pos]; FinalizeInput[]; ERROR IO.Error[SyntaxError, s]};
BEGIN
nextToken ← ParseWeekday[nextToken];
nextToken ← ParseDate[nextToken ! Unintelligible => GO TO tryTimeFirst];
nextToken ← ParseTime[ConsumeSPOrComma[nextToken] ! Unintelligible => GO TO dateOnly];
nextToken ← ParseZone[nextToken];
EXITS
dateOnly => NULL;
tryTimeFirst => {
nextToken ← ParseTime[nextToken];
nextToken ← ParseZone[nextToken];
nextToken ← ParseDate[ConsumeSPOrComma[nextToken] ! Unintelligible => CONTINUE];
};
END;
END;
BackupStream[to: input.tokens[nextToken].offset];
FinalizeInput[];
};
END.