<> <> <> <> <> DIRECTORY Ascii USING [SP, TAB, Upper], BasicTime USING [DayOfWeek, hoursPerDay, minutesPerHour, MonthOfYear, secondsPerMinute, Unpacked], IO USING [Backup, EndOf, Error, GetChar, STREAM], RefText USING [AppendChar, ObtainScratch, ReleaseScratch, TrustTextAsRope], Rope USING [Equal, ROPE, Substr]; IODateAndTimeImpl: CEDAR PROGRAM IMPORTS Ascii, IO, RefText, Rope EXPORTS IO = BEGIN hoursPerDay: NAT = BasicTime.hoursPerDay; minutesPerHour: NAT = BasicTime.minutesPerHour; secondsPerMinute: NAT = BasicTime.secondsPerMinute; secondsPerDay: INT = hoursPerDay.LONG*minutesPerHour*secondsPerMinute; baseYear: NAT = 1968; EpochDays: TYPE = INT[0..LAST[LONG CARDINAL]/secondsPerDay]; <> Year: TYPE = NAT[baseYear..MIN[baseYear+(LAST[EpochDays]/(4*365+1))*4, 2036--BasicTime's limit--]]; Month: TYPE = NAT[1..12]; MonthOfYear: TYPE = BasicTime.MonthOfYear[January..December]; Day: TYPE = NAT[1..31]; Hour: TYPE = NAT[0..hoursPerDay); Minute: TYPE = NAT[0..minutesPerHour); Second: TYPE = NAT[0..secondsPerMinute); DeltaMinutes: TYPE = INTEGER[-hoursPerDay*minutesPerHour..hoursPerDay*minutesPerHour]; <> ZoneIndex: TYPE = INT [-12..12]; NAZones: TYPE = ZoneIndex[4..11]; ROPE: TYPE = Rope.ROPE; initialTokens: NAT = 20; initialBackingChars: NAT = 100; TokenCount: TYPE = NAT; TokenIndex: TYPE = NAT[TokenCount.FIRST..TokenCount.LAST); CharIndex: TYPE = NAT; CharCount: TYPE = NAT[0..37777B]; -- this upper bound saves a word in Token Input: TYPE = RECORD [ nTokens: TokenCount, scratch: REF TEXT, backing: REF TEXT, tokens: SEQUENCE length: TokenCount OF Token ]; Token: TYPE = RECORD [ offset: CharIndex, kind: SELECT type: * FROM alpha => [length: CharCount], num => [length: CharCount], sep => [char: CHAR], ENDCASE ]; sentinel: CHAR = 200C; GetUnpackedTime: PUBLIC PROC [s: IO.STREAM] RETURNS [time: BasicTime.Unpacked _ []] = { Unintelligible: ERROR [pos: NAT] = CODE; input: REF Input; <<***************>> <<*>> <<* Scanner>> <<*>> <<***************>> <> <> <> <> InitializeInput: PROC = { input _ NEW[Input[initialTokens]]; input.scratch _ RefText.ObtainScratch[initialBackingChars]; input.backing _ RefText.AppendChar[input.scratch, sentinel]; input.nTokens _ 0; AddToken[[0, sep[sentinel]]]; }; FinalizeInput: PROC = {RefText.ReleaseScratch[input.scratch]}; state: {initial, num, alpha, eof} _ $initial; GetToken: PROC [ti: TokenIndex] RETURNS [Token] = { UNTIL input.nTokens > ti DO IF state = $eof THEN RETURN [input.tokens[input.nTokens-1]]; ReadMoreTokens[]; ENDLOOP; RETURN[input.tokens[ti]] }; ReadMoreTokens: PROC = { i: NAT; GetChar: PROC RETURNS [char: CHAR] = { char _ IF s.EndOf[] THEN sentinel ELSE s.GetChar[]; i _ input.backing.length; input.backing _ RefText.AppendChar[input.backing, char]; }; Backup: PROC = { s.Backup[input.backing[i _ input.backing.length.PRED]]; input.backing.length _ i; }; tStart: NAT; IF state ~= $initial THEN ERROR; DO char: CHAR _ GetChar[]; SELECT char FROM IN ['0..'9] => SELECT state FROM $initial => {state _ $num; tStart _ i}; $num => NULL; $alpha => {AddToken[[tStart, alpha[i-tStart]]]; Backup[]; EXIT}; ENDCASE; IN ['A..'Z], IN ['a..'z] => SELECT state FROM $initial => {state _ $alpha; tStart _ i}; $num => {AddToken[[tStart, num[i-tStart]]]; Backup[]; EXIT}; $alpha => NULL; ENDCASE; ENDCASE => { SELECT char FROM ',, '., ':, '/, '-, '+, Ascii.SP => NULL; Ascii.TAB => char _ Ascii.SP; ENDCASE => char _ sentinel; SELECT state FROM $initial => WITH t: input.tokens[input.nTokens-1] SELECT FROM sep => { <> IF t.char = Ascii.SP THEN t.char _ char ELSE IF char ~= Ascii.SP THEN --illegal-- t.char _ char _ sentinel; IF char = sentinel THEN {state _ $eof; EXIT}; }; ENDCASE => { AddToken[[i, sep[char]]]; IF char = sentinel THEN state _ $eof; EXIT }; $num => {AddToken[[tStart, num[i-tStart]]]; AddToken[[i, sep[char]]]; EXIT}; $alpha => {AddToken[[tStart, alpha[i-tStart]]]; AddToken[[i, sep[char]]]; EXIT}; ENDCASE; }; ENDLOOP; IF state ~= $eof THEN state _ $initial; }; AddToken: PROC [t: Token] = { <> IF input.nTokens = input.length THEN { newInput: REF Input _ NEW[Input[(input.length*3)/2]]; newInput.nTokens _ input.nTokens; newInput.backing _ input.backing; FOR i: NAT IN [0..input.nTokens) DO TRUSTED{newInput.tokens[i] _ input.tokens[i]}; ENDLOOP; input _ newInput; } ELSE {TRUSTED{input.tokens[input.nTokens] _ t}; input.nTokens _ input.nTokens.SUCC}; }; BackupStream: PROC [to: CharIndex] = { FOR i: CharIndex DECREASING IN [to..input.backing.length) DO char: CHAR = input.backing[i]; IF char ~= sentinel THEN s.Backup[char]; ENDLOOP; }; <<***************>> <<*>> <<* Weekday Parser>> <<*>> <<***************>> ParseWeekday: PROC [first: TokenIndex] RETURNS [next: TokenIndex] = { next _ first; WITH GetToken[first] SELECT FROM t: alpha Token => { DayOfWeek: TYPE = BasicTime.DayOfWeek[Monday..Sunday]; weekdays: ARRAY DayOfWeek OF ROPE = [ "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]; a: ROPE = Rope.Substr[base: RefText.TrustTextAsRope[input.backing], start: t.offset, len: t.length]; IF t.length >= 2 THEN FOR i: DayOfWeek IN DayOfWeek DO IF a.Equal[s2: weekdays[i].Substr[len: t.length], case: FALSE] THEN RETURN[ConsumeSPOrComma[first.SUCC]]; ENDLOOP; }; ENDCASE; }; <<***************>> <<*>> <<* Date Parser>> <<*>> <<***************>> ParseDate: PROC [first: TokenIndex] RETURNS [next: TokenIndex] = { ENABLE OutOfRange => ERROR Unintelligible[pos]; BogusMonth: ERROR = CODE; ParseSymbolicMonth: PROC [t: alpha Token] RETURNS [month: MonthOfYear] = { MonthNames: TYPE = ARRAY MonthOfYear OF ROPE; m: ROPE = Rope.Substr[base: RefText.TrustTextAsRope[input.backing], start: t.offset, len: t.length]; SELECT Ascii.Upper[input.backing[t.offset]] FROM 'I, 'V, 'X => { roman: MonthNames _ [ "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X", "XI", "XII"]; FOR month IN MonthOfYear DO IF m.Equal[s2: roman[month], case: FALSE] THEN RETURN; ENDLOOP; }; ENDCASE => { english: MonthNames _ [ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]; IF t.length >= 3 THEN FOR month IN MonthOfYear DO IF m.Equal[s2: english[month].Substr[len: t.length], case: FALSE] THEN RETURN; ENDLOOP; }; ERROR BogusMonth }; CollectYear: PROC [y: num Token] RETURNS [Year] = { <> RETURN[1900+CollectValue[y, Year.FIRST-1900, 99 ! OutOfRange => CONTINUE]]; RETURN[CollectValue[y, Year.FIRST, Year.LAST]] }; CollectMonth: PROC [m: num Token] RETURNS [MonthOfYear] = INLINE {RETURN[VAL[CollectValue[m, Month.FIRST, Month.LAST]-Month.FIRST]]}; daysForMonth: ARRAY MonthOfYear OF NAT = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]; CollectDay: PROC [d: num Token, m: MonthOfYear] RETURNS [Day] = INLINE {RETURN[CollectValue[d, Day.FIRST, daysForMonth[m]]]}; second, third: TokenIndex; t: num Token; IF GetToken[first].type = sep THEN Bogus[first]; <> WITH GetToken[second_first.SUCC] SELECT FROM sep: sep Token => IF sep.char = sentinel THEN Bogus[second] ELSE second _ second.SUCC; ENDCASE; <> WITH GetToken[third_second.SUCC] SELECT FROM sep: sep Token => IF sep.char = sentinel THEN Bogus[third] ELSE third _ third.SUCC; ENDCASE; <> WITH GetToken[third] SELECT FROM y: num Token => t _ y; ENDCASE => Bogus[third]; WITH GetToken[first] SELECT FROM m: alpha Token => { time.year _ CollectYear[t]; time.month _ ParseSymbolicMonth[m ! BogusMonth => Bogus[first]]; <<'first' corresponds to a valid month. We now require that 'second' be the day of the month and 'third' be the year.>> WITH GetToken[second] SELECT FROM d: num Token => time.day _ CollectDay[d, time.month]; ENDCASE => Bogus[second]; }; f: num Token => WITH GetToken[second] SELECT FROM m: alpha Token => { <> time.month _ ParseSymbolicMonth[m ! BogusMonth => Bogus[second]]; < or .>> time.day _ CollectDay[f, time.month ! OutOfRange => GO TO yearFirst]; time.year _ CollectYear[t]; EXITS yearFirst => {time.year _ CollectYear[f]; time.day _ CollectDay[t, time.month]}; }; s: num Token => { <> time.year _ CollectYear[t ! OutOfRange => GO TO yearFirst]; time.month _ CollectMonth[f ! OutOfRange => GO TO dayFirst]; time.day _ CollectDay[s, time.month]; EXITS dayFirst => { time.month _ CollectMonth[s]; time.day _ CollectDay[f, time.month]; }; yearFirst => { time.year _ CollectYear[f]; time.month _ CollectMonth[s]; time.day _ CollectDay[t, time.month]; }; }; ENDCASE; ENDCASE; next _ third.SUCC; <> IF time.month = February AND time.day = 29 AND ~LeapYear[time.year] THEN Bogus[next]; }; <<***************>> <<*>> <<* Time Parser>> <<*>> <<***************>> ParseTime: PROC [first: TokenIndex] RETURNS [next: TokenIndex] = { ENABLE OutOfRange => ERROR Unintelligible[pos]; CollectHour: PROC [h: num Token] RETURNS [Hour] = INLINE {RETURN[CollectValue[h, Hour.FIRST, Hour.LAST]]}; CollectMinute: PROC [m: num Token] RETURNS [Minute] = INLINE {RETURN[CollectValue[m, Minute.FIRST, Minute.LAST]]}; CollectSecond: PROC [s: num Token] RETURNS [Second] = INLINE {RETURN[CollectValue[s, Second.FIRST, Second.LAST]]}; AMorPM: PROC [t: TokenIndex] RETURNS [BOOL] = { WITH GetToken[t] SELECT FROM m: alpha Token => IF m.length = 2 AND Ascii.Upper[input.backing[m.offset.SUCC]] = 'M THEN { offset: Hour; SELECT Ascii.Upper[input.backing[m.offset]] FROM 'A => offset _ 0; 'P => offset _ 12; ENDCASE => RETURN[FALSE]; IF ~(time.hour IN [1..12]) THEN Bogus[t]; time.hour _ (time.hour MOD 12) + offset; RETURN[TRUE] }; ENDCASE; RETURN[FALSE] }; n: num Token; next _ first; time.hour _ time.minute _ time.second _ 0; WITH GetToken[first] SELECT FROM f: num Token => n _ f; f: sep Token => IF f.char = sentinel THEN --time omitted-- RETURN ELSE Bogus[first]; ENDCASE => Bogus[first]; next _ next.SUCC; SELECT n.length FROM 1, 2, 4, 6 => NULL; ENDCASE => Bogus[first]; time.hour _ CollectHour[[n.offset, num[MIN[n.length, 2]]]]; IF n.length <= 2 THEN { <> WITH GetToken[next] SELECT FROM sep: sep Token => IF sep.char = sentinel THEN Bogus[next]; ENDCASE => Bogus[next]; <> next _ next.SUCC; WITH GetToken[next] SELECT FROM s: num Token => n _ s; ENDCASE => Bogus[next]; SELECT n.length FROM 2, 4 => next _ next.SUCC; ENDCASE => Bogus[next]; } ELSE {n.offset _ n.offset + 2; n.length _ n.length - 2}; time.minute _ CollectMinute[[n.offset, num[2]]]; <> IF n.length > 2 THEN <> time.second _ CollectSecond[[n.offset+2, num[2]]] ELSE <> WITH GetToken[next] SELECT FROM sep: sep Token => IF sep.char = ': THEN <> WITH GetToken[next.SUCC] SELECT FROM s: num Token => {time.second _ CollectSecond[s]; next _ next + 2}; ENDCASE => Bogus[next.SUCC]; ENDCASE; <> WITH GetToken[next] SELECT FROM s: sep Token => IF s.char ~= sentinel AND AMorPM[next.SUCC] THEN next _ next + 2; a: alpha Token => IF AMorPM[next] THEN next _ next.SUCC; ENDCASE; }; <<***************>> <<*>> <<* Zone Parser>> <<*>> <<***************>> ParseZone: PROC [first: TokenIndex] RETURNS [next: TokenIndex] = { ENABLE OutOfRange => ERROR Unintelligible[pos]; BadZone: ERROR = CODE; dst: BOOL _ FALSE; CollectSymbolic: PROC [z: alpha Token] RETURNS [DeltaMinutes] = { char: CHAR = Ascii.Upper[input.backing[z.offset]]; SELECT z.length FROM 1 => { zones: PACKED ARRAY ZoneIndex OF CHAR = [ 'Y, 'X, 'W, 'V, 'U, 'T, 'S, 'R, 'Q, 'P, 'O, 'N, 'Z, 'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'K, 'L, 'M ]; FOR hour: ZoneIndex IN ZoneIndex DO IF char = zones[hour] THEN RETURN[hour*minutesPerHour]; ENDLOOP; }; 2 => IF char = 'U AND Ascii.Upper[input.backing[z.offset+1]] = 'T THEN RETURN[0]; 3 => { naZones: PACKED ARRAY NAZones OF CHAR = ['A, 'E, 'C, 'M, 'P, 'Y, 'H, 'B]; IF Ascii.Upper[input.backing[z.offset+2]] = 'T THEN { SELECT Ascii.Upper[input.backing[z.offset.SUCC]] FROM 'S, 'M => NULL; -- treat "mean" same as "standard" 'D => dst _ TRUE; ENDCASE; SELECT char FROM 'G => IF ~dst THEN RETURN[0]; 'N => IF ~dst THEN RETURN[3*minutesPerHour+30]; ENDCASE => FOR hour: NAZones IN NAZones DO IF char = naZones[hour] THEN RETURN[hour*minutesPerHour]; ENDLOOP; }; }; ENDCASE; ERROR BadZone }; CollectAbsolute: PROC [z: num Token] RETURNS [DeltaMinutes] = { hour: Hour; hourLength: NAT; minute: Minute _ 0; IF ~(z.length IN [1..4]) THEN ERROR OutOfRange[z.offset+z.length]; hourLength _ 2 - (z.length MOD 2); hour _ CollectValue[[z.offset, num[hourLength]], Hour.FIRST, Hour.LAST]; IF z.length > 2 THEN minute _ CollectValue[[z.offset+hourLength, num[2]], Minute.FIRST, Minute.LAST]; RETURN[hour*minutesPerHour+minute] }; WITH GetToken[next _ first] SELECT FROM z: sep Token => SELECT z.char FROM Ascii.SP, '+, '- => { WITH GetToken[next.SUCC] SELECT FROM zone: num Token => SELECT z.char FROM Ascii.SP => GO TO noZone; -- numeric must be preceded by + or - '+ => time.zone _ CollectAbsolute[zone]; '- => time.zone _ -CollectAbsolute[zone]; ENDCASE; -- can't happen zone: alpha Token => IF z.char = '+ THEN Bogus[next.SUCC] ELSE time.zone _ CollectSymbolic[zone ! BadZone => IF z.char = Ascii.SP THEN GO TO noZone ELSE Bogus[next.SUCC]]; ENDCASE; -- can't happen next _ next.SUCC; }; ENDCASE => GO TO noZone; -- includes sentinel z: alpha Token => time.zone _ CollectSymbolic[z ! BadZone => GO TO noZone]; ENDCASE => GO TO noZone; next _ next.SUCC; time.dst _ IF dst THEN yes ELSE no; EXITS noZone => NULL; }; <<***************>> <<*>> <<* Utilities>> <<*>> <<***************>> Bogus: PROC [ti: TokenIndex] = {ERROR Unintelligible[input.tokens[ti].offset]}; OutOfRange: ERROR [pos: CharIndex] = CODE; CollectValue: PROC [t: num Token, low, high: NAT] RETURNS [value: NAT _ 0] = { FOR j: CharIndex IN [t.offset..t.offset+t.length) DO <> IF value > NAT.LAST/10 - 1 THEN ERROR OutOfRange[j]; value _ value * 10 + (input.backing[j] - '0); ENDLOOP; IF ~(value IN [low..high]) THEN ERROR OutOfRange[t.offset+t.length]; }; LeapYear: PROC [year: Year] RETURNS [BOOL] = { RETURN[year MOD 4 = 0 AND (year MOD 100 ~= 0 OR year MOD 400 = 0)]}; ConsumeSPOrComma: PROC [t: TokenIndex] RETURNS [TokenIndex] = { WITH GetToken[t] SELECT FROM s: sep Token => SELECT s.char FROM Ascii.SP, ', => RETURN[t.SUCC]; ENDCASE; ENDCASE; RETURN[t] }; <<***************>> <<*>> <<* Main Body>> <<*>> <<***************>> nextToken: TokenIndex _ TokenIndex.FIRST.SUCC; InitializeInput[]; BEGIN ENABLE Unintelligible => { BackupStream[to: pos]; FinalizeInput[]; ERROR IO.Error[SyntaxError, s]}; BEGIN nextToken _ ParseWeekday[nextToken]; nextToken _ ParseDate[nextToken ! Unintelligible => GO TO tryTimeFirst]; nextToken _ ParseTime[ConsumeSPOrComma[nextToken] ! Unintelligible => GO TO dateOnly]; nextToken _ ParseZone[nextToken]; EXITS dateOnly => NULL; tryTimeFirst => { nextToken _ ParseTime[nextToken]; nextToken _ ParseZone[nextToken]; nextToken _ ParseDate[ConsumeSPOrComma[nextToken] ! Unintelligible => CONTINUE]; }; END; END; BackupStream[to: input.tokens[nextToken].offset]; FinalizeInput[]; }; END.