DIRECTORY
Ascii USING [SP, TAB, Upper],
BasicTime
USING [
DayOfWeek, hoursPerDay, minutesPerHour, MonthOfYear, secondsPerMinute, Unpacked],
IO USING [Backup, EndOf, Error, GetChar, STREAM],
RefText USING [AppendChar, ObtainScratch, ReleaseScratch, TrustTextAsRope],
Rope USING [Equal, ROPE, Substr];
GetUnpackedTime:
PUBLIC
PROC [s:
IO.
STREAM]
RETURNS [time: BasicTime.Unpacked ← []] = {
Unintelligible: ERROR [pos: NAT] = CODE;
input: REF Input;
***************
*
* Scanner
*
***************
The tokens built up in the input sequence (input.tokens) satisfy the following properties:
input.tokens[0] is a sentinel separator
two separators never appear consecutively
a sentinel separator delimits the end of interesting input
InitializeInput:
PROC = {
input ← NEW[Input[initialTokens]];
input.scratch ← RefText.ObtainScratch[initialBackingChars];
input.backing ← RefText.AppendChar[input.scratch, sentinel];
input.nTokens ← 0;
AddToken[[0, sep[sentinel]]];
};
FinalizeInput: PROC = {RefText.ReleaseScratch[input.scratch]};
state: {initial, num, alpha, eof} ← $initial;
GetToken:
PROC [ti: TokenIndex]
RETURNS [Token] = {
UNTIL input.nTokens > ti
DO
IF state = $eof THEN RETURN [input.tokens[input.nTokens-1]];
ReadMoreTokens[];
ENDLOOP;
RETURN[input.tokens[ti]]
};
ReadMoreTokens:
PROC = {
i: NAT;
GetChar:
PROC
RETURNS [char:
CHAR] = {
char ← IF s.EndOf[] THEN sentinel ELSE s.GetChar[];
i ← input.backing.length;
input.backing ← RefText.AppendChar[input.backing, char];
};
Backup:
PROC = {
s.Backup[input.backing[i ← input.backing.length.PRED]];
input.backing.length ← i;
};
tStart: NAT;
IF state ~= $initial THEN ERROR;
DO
char: CHAR ← GetChar[];
SELECT char
FROM
Ascii.TAB => char ← Ascii.SP;
IN [40C..176C] => NULL;
ENDCASE => --illegal; terminate scan-- char ← sentinel;
SELECT char
FROM
IN ['0..'9] =>
SELECT state
FROM
$initial => {state ← $num; tStart ← i};
$num => NULL;
$alpha => {AddToken[[tStart, alpha[i-tStart]]]; Backup[]; EXIT};
ENDCASE;
',, '., ':, '/, '-, '+, Ascii.
SP, sentinel =>
SELECT state
FROM
$initial =>
WITH t: input.tokens[input.nTokens-1]
SELECT
FROM
sep => {
The effect of the following is to ignore spaces as separators unless there is nothing else, and to terminate the scan if two separator characters are adjacent.
IF t.char = Ascii.SP THEN t.char ← char
ELSE IF char ~= Ascii.SP THEN --illegal-- t.char ← char ← sentinel;
IF char = sentinel THEN {state ← $eof; EXIT};
};
ENDCASE => {
AddToken[[i, sep[char]]];
IF char = sentinel THEN state ← $eof;
EXIT
};
$num => {AddToken[[tStart, num[i-tStart]]]; AddToken[[i, sep[char]]]; EXIT};
$alpha => {AddToken[[tStart, alpha[i-tStart]]]; AddToken[[i, sep[char]]]; EXIT};
ENDCASE;
ENDCASE =>
SELECT state
FROM
$initial => {state ← $alpha; tStart ← i};
$num => {AddToken[[tStart, num[i-tStart]]]; Backup[]; EXIT};
$alpha => NULL;
ENDCASE;
ENDLOOP;
IF state ~= $eof THEN state ← $initial;
};
AddToken:
PROC [t: Token] = {
The TRUSTED statements in this procedure are actually SAFE, but the compiler is too conservative in its treatment of variant records. Our Tokens contain no REFs, so we should be able to manipulate them freely without compromising the integrity of storage. Unfortunately, the compiler doesn't understand that.
IF input.nTokens = input.length
THEN {
newInput: REF Input ← NEW[Input[(input.length*3)/2]];
newInput.nTokens ← input.nTokens;
newInput.backing ← input.backing;
FOR i:
NAT
IN [0..input.nTokens)
DO
TRUSTED{newInput.tokens[i] ← input.tokens[i]};
ENDLOOP;
input ← newInput;
}
ELSE {TRUSTED{input.tokens[input.nTokens] ← t}; input.nTokens ← input.nTokens.SUCC};
};
BackupStream:
PROC [to: CharIndex] = {
FOR i: CharIndex
DECREASING
IN [to..input.backing.length)
DO
char: CHAR = input.backing[i];
IF char ~= sentinel THEN s.Backup[char];
ENDLOOP;
};
***************
*
* Weekday Parser
*
***************
ParseWeekday:
PROC [first: TokenIndex]
RETURNS [next: TokenIndex] = {
next ← first;
WITH GetToken[first]
SELECT
FROM
t: alpha Token => {
DayOfWeek: TYPE = BasicTime.DayOfWeek[Monday..Sunday];
weekdays:
ARRAY DayOfWeek
OF
ROPE = [
"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"];
a:
ROPE =
Rope.Substr[base: RefText.TrustTextAsRope[input.backing], start: t.offset, len: t.length];
IF t.length >= 2
THEN
FOR i: DayOfWeek
IN DayOfWeek
DO
IF a.Equal[s2: weekdays[i].Substr[len: t.length], case:
FALSE]
THEN
RETURN[ConsumeSPOrComma[first.SUCC]];
ENDLOOP;
};
ENDCASE;
};
***************
*
* Date Parser
*
***************
ParseDate:
PROC [first: TokenIndex]
RETURNS [next: TokenIndex] = {
ENABLE OutOfRange => ERROR Unintelligible[pos];
BogusMonth: ERROR = CODE;
ParseSymbolicMonth:
PROC [t: alpha Token]
RETURNS [month: MonthOfYear] = {
MonthNames: TYPE = ARRAY MonthOfYear OF ROPE;
m:
ROPE =
Rope.Substr[base: RefText.TrustTextAsRope[input.backing], start: t.offset, len: t.length];
SELECT Ascii.Upper[input.backing[t.offset]]
FROM
'I, 'V, 'X => {
roman: MonthNames ← [
"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X", "XI", "XII"];
FOR month
IN MonthOfYear
DO
IF m.Equal[s2: roman[month], case: FALSE] THEN RETURN;
ENDLOOP;
};
ENDCASE => {
english: MonthNames ← [
"January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December"];
IF t.length >= 3
THEN
FOR month
IN MonthOfYear
DO
IF m.Equal[s2: english[month].Substr[len: t.length], case: FALSE] THEN RETURN;
ENDLOOP;
};
ERROR BogusMonth
};
CollectYear:
PROC [y: num Token]
RETURNS [Year] = {
This assumes Year.FIRST is in the 20th century
RETURN[1900+CollectValue[y, Year.FIRST-1900, 99 ! OutOfRange => CONTINUE]];
RETURN[CollectValue[y, Year.FIRST, Year.LAST]]
};
CollectMonth:
PROC [m: num Token]
RETURNS [MonthOfYear] =
INLINE
{RETURN[VAL[CollectValue[m, Month.FIRST, Month.LAST]-Month.FIRST]]};
daysForMonth: ARRAY MonthOfYear OF NAT = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
CollectDay:
PROC [d: num Token, m: MonthOfYear]
RETURNS [Day] =
INLINE
{RETURN[CollectValue[d, Day.FIRST, daysForMonth[m]]]};
second, third: TokenIndex;
t: num Token;
IF GetToken[first].type = sep THEN Bogus[first];
Assert: 'first' is alpha or num.
WITH GetToken[second𡤏irst.
SUCC]
SELECT
FROM
sep: sep Token => IF sep.char = sentinel THEN Bogus[second] ELSE second ← second.SUCC;
ENDCASE;
Assert: 'second' is alpha or num. (Reasoning: if first.SUCC was a separator other than the final sentinel, then it must be followed by at least one non-separator token, since two separators cannot be adjacent, and the token sequence always ends with a sentinel, which is a separator.
WITH GetToken[third←second.
SUCC]
SELECT
FROM
sep: sep Token => IF sep.char = sentinel THEN Bogus[third] ELSE third ← third.SUCC;
ENDCASE;
Assert: 'third' is alpha or num. (Reasoning is similar to that for 'second'.) In all legal cases, 'third' must be numeric, so we check for that now.
WITH GetToken[third]
SELECT
FROM
y: num Token => t ← y;
ENDCASE => Bogus[third];
WITH GetToken[first]
SELECT
FROM
m: alpha Token => {
time.year ← CollectYear[t];
time.month ← ParseSymbolicMonth[m ! BogusMonth => Bogus[first]];
'first' corresponds to a valid month. We now require that 'second' be the day of the month and 'third' be the year.
WITH GetToken[second]
SELECT
FROM
d: num Token => time.day ← CollectDay[d, time.month];
ENDCASE => Bogus[second];
};
f: num Token =>
WITH GetToken[second]
SELECT
FROM
m: alpha Token => {
The second token is alpha, so we will require it to be the month.
time.month ← ParseSymbolicMonth[m ! BogusMonth => Bogus[second]];
Now we must decide if we have <day><month><year> or <year><month><day>.
time.day ← CollectDay[f, time.month ! OutOfRange => GO TO yearFirst];
time.year ← CollectYear[t];
EXITS
yearFirst => {time.year ← CollectYear[f]; time.day ← CollectDay[t, time.month]};
};
s: num Token => {
All three parts are numeric.
time.year ← CollectYear[t ! OutOfRange => GO TO yearFirst];
time.month ← CollectMonth[f ! OutOfRange => GO TO dayFirst];
time.day ← CollectDay[s, time.month];
EXITS
dayFirst => {
time.month ← CollectMonth[s];
time.day ← CollectDay[f, time.month];
};
yearFirst => {
time.year ← CollectYear[f];
time.month ← CollectMonth[s];
time.day ← CollectDay[t, time.month];
};
};
ENDCASE;
ENDCASE;
next ← third.SUCC;
Be sure we weren't slipped a bogus leap year...
IF time.month = February AND time.day = 29 AND ~LeapYear[time.year] THEN Bogus[next];
};
***************
*
* Time Parser
*
***************
ParseTime:
PROC [first: TokenIndex]
RETURNS [next: TokenIndex] = {
ENABLE OutOfRange => ERROR Unintelligible[pos];
CollectHour:
PROC [h: num Token]
RETURNS [Hour] =
INLINE
{RETURN[CollectValue[h, Hour.FIRST, Hour.LAST]]};
CollectMinute:
PROC [m: num Token]
RETURNS [Minute] =
INLINE
{RETURN[CollectValue[m, Minute.FIRST, Minute.LAST]]};
CollectSecond:
PROC [s: num Token]
RETURNS [Second] =
INLINE
{RETURN[CollectValue[s, Second.FIRST, Second.LAST]]};
AMorPM:
PROC [t: TokenIndex]
RETURNS [
BOOL] = {
WITH GetToken[t]
SELECT
FROM
m: alpha Token =>
IF m.length = 2
AND Ascii.Upper[input.backing[m.offset.
SUCC]] = 'M
THEN {
offset: Hour;
SELECT Ascii.Upper[input.backing[m.offset]]
FROM
'A => offset ← 0;
'P => offset ← 12;
ENDCASE => RETURN[FALSE];
IF ~(time.hour IN [1..12]) THEN Bogus[t];
time.hour ← (time.hour MOD 12) + offset;
RETURN[TRUE]
};
ENDCASE;
RETURN[FALSE]
};
n: num Token;
next ← first;
time.hour ← time.minute ← time.second ← 0;
WITH GetToken[first]
SELECT
FROM
f: num Token => n ← f;
f: sep Token => IF f.char = sentinel THEN --time omitted-- RETURN ELSE Bogus[first];
ENDCASE => Bogus[first];
next ← next.SUCC;
SELECT n.length
FROM
1, 2, 4, 6 => NULL;
ENDCASE => Bogus[first];
time.hour ← CollectHour[[n.offset, num[MIN[n.length, 2]]]];
IF n.length <= 2
THEN {
A separator must follow a 1- or 2- digit hour field.
WITH GetToken[next]
SELECT
FROM
sep: sep Token => IF sep.char = sentinel THEN Bogus[next];
ENDCASE => Bogus[next];
Assert: 'next' is a non-sentinel separator. Therefore, 'next.SUCC' is alpha or num.
next ← next.SUCC;
WITH GetToken[next]
SELECT
FROM
s: num Token => n ← s;
ENDCASE => Bogus[next];
SELECT n.length
FROM
2, 4 => next ← next.SUCC;
ENDCASE => Bogus[next];
}
ELSE {n.offset ← n.offset + 2; n.length ← n.length - 2};
time.minute ← CollectMinute[[n.offset, num[2]]];
Assert: 'next' indexes the token following the minutes.
IF n.length > 2
THEN
The minutes and seconds are concatenated.
time.second ← CollectSecond[[n.offset+2, num[2]]]
ELSE
Now look for optional seconds field. We assume it is present if there is a colon separator following the minutes.
WITH GetToken[next]
SELECT
FROM
sep: sep Token =>
IF sep.char = ':
THEN
Assert: 'next.SUCC' is alpha or num.
WITH GetToken[next.
SUCC]
SELECT
FROM
s: num Token => {time.second ← CollectSecond[s]; next ← next + 2};
ENDCASE => Bogus[next.SUCC];
ENDCASE;
Assert: 'next' indexes the token following the last time part (minutes or seconds).
WITH GetToken[next]
SELECT
FROM
s: sep Token => IF s.char ~= sentinel AND AMorPM[next.SUCC] THEN next ← next + 2;
a: alpha Token => IF AMorPM[next] THEN next ← next.SUCC;
ENDCASE;
};
***************
*
* Zone Parser
*
***************
ParseZone:
PROC [first: TokenIndex]
RETURNS [next: TokenIndex] = {
ENABLE OutOfRange => ERROR Unintelligible[pos];
BadZone: ERROR = CODE;
dst: BOOL ← FALSE;
CollectSymbolic:
PROC [z: alpha Token]
RETURNS [DeltaMinutes] = {
char: CHAR = Ascii.Upper[input.backing[z.offset]];
SELECT z.length
FROM
1 => {
zones:
PACKED
ARRAY ZoneIndex
OF
CHAR = [
'Y, 'X, 'W, 'V, 'U, 'T, 'S, 'R, 'Q, 'P, 'O, 'N,
'Z,
'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'K, 'L, 'M
];
FOR hour: ZoneIndex
IN ZoneIndex
DO
IF char = zones[hour] THEN RETURN[hour*minutesPerHour];
ENDLOOP;
};
2 => IF char = 'U AND Ascii.Upper[input.backing[z.offset+1]] = 'T THEN RETURN[0];
3 => {
naZones: PACKED ARRAY NAZones OF CHAR = ['A, 'E, 'C, 'M, 'P, 'Y, 'H, 'B];
IF Ascii.Upper[input.backing[z.offset+2]] = 'T
THEN {
SELECT Ascii.Upper[input.backing[z.offset.
SUCC]]
FROM
'S, 'M => NULL; -- treat "mean" same as "standard"
'D => dst ← TRUE;
ENDCASE;
SELECT char
FROM
'G => IF ~dst THEN RETURN[0];
'N => IF ~dst THEN RETURN[3*minutesPerHour+30];
ENDCASE =>
FOR hour: NAZones
IN NAZones
DO
IF char = naZones[hour] THEN RETURN[hour*minutesPerHour];
ENDLOOP;
};
};
ENDCASE;
ERROR BadZone
};
CollectAbsolute:
PROC [z: num Token]
RETURNS [DeltaMinutes] = {
hour: Hour;
hourLength: NAT;
minute: Minute ← 0;
IF ~(z.length IN [1..4]) THEN ERROR OutOfRange[z.offset+z.length];
hourLength ← 2 - (z.length MOD 2);
hour ← CollectValue[[z.offset, num[hourLength]], Hour.FIRST, Hour.LAST];
IF z.length > 2
THEN
minute ← CollectValue[[z.offset+hourLength, num[2]], Minute.FIRST, Minute.LAST];
RETURN[hour*minutesPerHour+minute]
};
WITH GetToken[next ← first]
SELECT
FROM
z: sep Token =>
SELECT z.char
FROM
Ascii.
SP, '+, '- => {
WITH GetToken[next.
SUCC]
SELECT
FROM
zone: num Token =>
SELECT z.char
FROM
Ascii.SP => GO TO noZone; -- numeric must be preceded by + or -
'+ => time.zone ← CollectAbsolute[zone];
'- => time.zone ← -CollectAbsolute[zone];
ENDCASE; -- can't happen
zone: alpha Token =>
IF z.char = '+ THEN Bogus[next.SUCC]
ELSE
time.zone ← CollectSymbolic[zone ! BadZone =>
IF z.char = Ascii.SP THEN GO TO noZone ELSE Bogus[next.SUCC]];
ENDCASE; -- can't happen
next ← next.SUCC;
};
ENDCASE => GO TO noZone; -- includes sentinel
z: alpha Token => time.zone ← CollectSymbolic[z ! BadZone => GO TO noZone];
ENDCASE => GO TO noZone;
next ← next.SUCC;
time.dst ← IF dst THEN yes ELSE no;
};
***************
*
* Utilities
*
***************
Bogus: PROC [ti: TokenIndex] = {ERROR Unintelligible[input.tokens[ti].offset]};
OutOfRange: ERROR [pos: CharIndex] = CODE;
CollectValue:
PROC [t: num Token, low, high:
NAT]
RETURNS [value:
NAT ← 0] = {
FOR j: CharIndex
IN [t.offset..t.offset+t.length)
DO
We would like to catch a bounds fault here and turn it into OutOfRange. Unfortunately, the current implementation of catch phrases doesn't support this, since a catch phrase is only attached to procedure calls. Accordingly, we have to do the test by hand. The actual test is a bit too stringent, but it doesn't really matter.
IF value > NAT.LAST/10 - 1 THEN ERROR OutOfRange[j];
value ← value * 10 + (input.backing[j] - '0);
ENDLOOP;
IF ~(value IN [low..high]) THEN ERROR OutOfRange[t.offset+t.length];
};
LeapYear:
PROC [year: Year]
RETURNS [
BOOL] = {
RETURN[year MOD 4 = 0 AND (year MOD 100 ~= 0 OR year MOD 400 = 0)]};
ConsumeSPOrComma:
PROC [t: TokenIndex]
RETURNS [TokenIndex] = {
WITH GetToken[t]
SELECT
FROM
s: sep Token => SELECT s.char FROM Ascii.SP, ', => RETURN[t.SUCC]; ENDCASE;
ENDCASE;
RETURN[t]
};
***************
*
* Main Body
*
***************
nextToken: TokenIndex ← TokenIndex.FIRST.SUCC;
InitializeInput[];
BEGIN
ENABLE Unintelligible => {
BackupStream[to: pos]; FinalizeInput[]; ERROR IO.Error[SyntaxError, s]};
BEGIN
nextToken ← ParseWeekday[nextToken];
nextToken ← ParseDate[nextToken ! Unintelligible => GO TO tryTimeFirst];
nextToken ← ParseTime[ConsumeSPOrComma[nextToken] ! Unintelligible => GO TO dateOnly];
nextToken ← ParseZone[nextToken];
EXITS
dateOnly => NULL;
tryTimeFirst => {
nextToken ← ParseTime[nextToken];
nextToken ← ParseZone[nextToken];
nextToken ← ParseDate[ConsumeSPOrComma[nextToken] ! Unintelligible => CONTINUE];
};
END;
END;
BackupStream[to: input.tokens[nextToken].offset];
FinalizeInput[];
};