<<>> <> <> <> <> <<>> <<>> <> <> <<>> DIRECTORY Ascii USING [Upper], Atom USING [GetProp, PutProp], BasicTime USING [GMT, Period], Convert USING [Error, IntFromRope], IO USING [BreakProc, EndOfStream, GetTokenRope, GetLineRope, TokenProc, GetChar, PeekChar, GetRopeLiteral, Error, STREAM, RIS], RegularExpression USING [CreateFromRope, Finder, SearchRope], Rope USING [Cat, Compare, Equal, Find, Index, Fetch, Length, ROPE, SkipTo, Substr], Soundex USING [NameToCode, SoundexCode], SymTab USING [Create, Delete, EachPairAction, Fetch, Pairs, Ref, Store], Tempus USING [Adjust, MakeRope, Parse, Precision, Unintelligible], PatternMatch; PatternMatchImpl: CEDAR PROGRAM IMPORTS Ascii, Atom, BasicTime, Convert, IO, RegularExpression, Rope, Soundex, SymTab, Tempus EXPORTS PatternMatch ~ BEGIN ROPE: TYPE ~ Rope.ROPE; Equal: PUBLIC PatternMatch.MatchProc = { <<[value: ROPE, pattern: ROPE, pparsed: REF _ NIL] RETURNS [match: BOOLEAN, nothingGreater: BOOLEAN _ FALSE, pparsedNew: REF _ NIL]>> <> SELECT Rope.Compare[s1: value, s2: pattern, case: FALSE] FROM less => match ¬ FALSE; equal => match ¬ TRUE; greater => {match ¬ FALSE; nothingGreater ¬ TRUE}; ENDCASE => ERROR; pparsedNew ¬ $None; }; Prefix: PUBLIC PatternMatch.MatchProc = { <<[value: ROPE, pattern: ROPE, pparsed: REF _ NIL] RETURNS [match: BOOLEAN, nothingGreater: BOOLEAN _ FALSE, pparsedNew: REF _ NIL]>> <> SELECT Rope.Compare[s1: Rope.Substr[value, 0, Rope.Length[pattern]], s2: pattern, case: FALSE] FROM less => match ¬ FALSE; equal => match ¬ TRUE; greater => {match ¬ FALSE; nothingGreater ¬ TRUE} ENDCASE => ERROR; pparsedNew ¬ $None; }; Wildcard: PUBLIC PatternMatch.MatchProc = { <<[value: ROPE, pattern: ROPE, pparsed: REF _ NIL] RETURNS [match: BOOLEAN, nothingGreater: BOOLEAN _ FALSE, pparsedNew: REF _ NIL]>> <> <> SubMatch: PROC [pstart: INT, plen: INT, vstart: INT, vlen: INT] RETURNS [match: BOOLEAN, nothingGreater: BOOLEAN ¬ FALSE] = { <> pchar, vchar: CHAR; WHILE plen > 0 DO pchar ¬ Ascii.Upper[pattern.Fetch[pstart]]; IF pchar = '* THEN { <> IF plen = 1 THEN RETURN [match: TRUE, nothingGreater: FALSE]; <> { -- first, accept the * newpstart: INT = pstart + 1; newplen: INT = plen - 1; newvstart: INT ¬ vstart; newvlen: INT ¬ vlen; WHILE newvlen >= 0 DO IF SubMatch[newpstart, newplen, newvstart, newvlen].match THEN RETURN [match: TRUE, nothingGreater: FALSE]; newvstart ¬ newvstart + 1; newvlen ¬ newvlen - 1; ENDLOOP; }; RETURN [match: FALSE, nothingGreater: FALSE]; }; IF vlen = 0 THEN RETURN [match: FALSE, nothingGreater: FALSE]; <> vchar ¬ Ascii.Upper[value.Fetch[vstart]]; IF pchar # vchar THEN RETURN [match: FALSE, nothingGreater: vchar > pchar]; pstart ¬ pstart + 1; plen ¬ plen - 1; vstart ¬ vstart + 1; vlen ¬ vlen - 1; ENDLOOP; IF vlen = 0 THEN RETURN [match: TRUE, nothingGreater: FALSE] ELSE RETURN [match: FALSE, nothingGreater: TRUE]; }; [match, nothingGreater] ¬ SubMatch [0, pattern.Length[], 0, value.Length[]]; pparsedNew ¬ $None; }; <> PRe: TYPE ~ REF PReBody; PReBody: TYPE ~ RECORD[ pattern: ROPE, finder: RegularExpression.Finder, purePrefix: ROPE ¬ NIL -- used to see if something greater might match ]; RE: PUBLIC PatternMatch.MatchProc = { <<[value: ROPE, pattern: ROPE, pparsed: REF _ NIL] RETURNS [match: BOOLEAN, nothingGreater: BOOLEAN _ FALSE, pparsedNew: REF _ NIL]>> <> pr: PRe; WITH pparsed SELECT FROM ppr: PRe => pr ¬ ppr; ENDCASE => { -- pparsed is wrong type or NIL pr ¬ NEW[PReBody]; pr.pattern ¬ pattern; pr.finder ¬ RegularExpression.CreateFromRope[pattern: pattern, ignoreCase: TRUE, addBounds: TRUE]; }; pparsedNew ¬ pr; match ¬ RegularExpression.SearchRope[pr.finder, value].found; IF NOT match THEN { -- see if something greater might match IF pr.purePrefix = NIL THEN { i: INT; i ¬ Rope.SkipTo[s: pr.pattern, skip: "\'#[^$*+(\\<{!"]; -- look for special chars IF NOT i = Rope.Length[pr.pattern] AND Rope.Fetch[pr.pattern, i] = '* THEN -- could be zero of previous char i ¬ i-1; pr.purePrefix ¬ Rope.Substr[base: pr.pattern, len: i]; }; IF Rope.Compare[s1: Rope.Substr[value, 0, Rope.Length[pr.purePrefix]], s2: pr.purePrefix, case: FALSE] = greater THEN nothingGreater ¬ TRUE; }; }; PSound: TYPE ~ REF PSoundBody; PSoundBody: TYPE ~ RECORD[ pattern: ROPE, ch: CHAR, code: Soundex.SoundexCode ]; SoundexMatch: PUBLIC PatternMatch.MatchProc = { <<[value: ROPE, pattern: ROPE, pparsed: REF _ NIL] RETURNS [match: BOOLEAN, nothingGreater: BOOLEAN _ FALSE, pparsedNew: REF _ NIL]>> <> pr: PSound; WITH pparsed SELECT FROM ppr: PSound => pr ¬ ppr; ENDCASE => { -- pparsed is wrong type or NIL IF Rope.Equal[pattern, ""] THEN RETURN [FALSE, FALSE, NIL]; pr ¬ NEW[PSoundBody]; pr.pattern ¬ pattern; pr.ch ¬ Ascii.Upper[Rope.Fetch[pattern,0]]; pr.code ¬ Soundex.NameToCode[pattern]; }; pparsedNew ¬ pr; <> IF Rope.Equal[value, ""] THEN RETURN [FALSE, FALSE, pparsedNew]; SELECT Ascii.Upper[Rope.Fetch[value,0]] FROM = pr.ch => match ¬ Rope.Equal[Soundex.NameToCode[value], pr.code]; < pr.ch => match ¬ FALSE; > pr.ch => {match ¬ FALSE; nothingGreater ¬ TRUE}; ENDCASE => ERROR; }; SoundexPrefix: PUBLIC PatternMatch.MatchProc = { <<[value: ROPE, pattern: ROPE, pparsed: REF _ NIL] RETURNS [match: BOOLEAN, nothingGreater: BOOLEAN _ FALSE, pparsedNew: REF _ NIL]>> <> pr: PSound; WITH pparsed SELECT FROM ppr: PSound => pr ¬ ppr; ENDCASE => { -- pparsed is wrong type or NIL IF Rope.Equal[pattern, ""] THEN RETURN [FALSE, FALSE, NIL]; pr ¬ NEW[PSoundBody]; pr.pattern ¬ pattern; pr.ch ¬ Ascii.Upper[Rope.Fetch[pattern,0]]; pr.code ¬ Soundex.NameToCode[pattern]; pr.code ¬ Rope.Substr[base: pr.code, len: Rope.Index[s1: pr.code, s2: "0"]]; }; pparsedNew ¬ pr; <> IF Rope.Equal[value, ""] THEN RETURN [FALSE, FALSE, pparsedNew]; SELECT Ascii.Upper[Rope.Fetch[value,0]] FROM = pr.ch => match ¬ Rope.Equal[Rope.Substr[Soundex.NameToCode[value], 0, Rope.Length[pr.code]], pr.code]; < pr.ch => match ¬ FALSE; > pr.ch => {match ¬ FALSE; nothingGreater ¬ TRUE}; ENDCASE => ERROR; }; ParseSubrange: PROC [r: ROPE] RETURNS [start, end: ROPE] ~ { <> ENABLE IO.EndOfStream, IO.Error => GOTO Bad; ToDash: IO.BreakProc = { <<[char: CHAR] RETURNS [IO.CharClass]>> RETURN[SELECT char FROM '- => sepr, ENDCASE => other]; }; s: IO.STREAM ¬ IO.RIS[r]; start ¬ IF IO.PeekChar[s] = '" THEN IO.GetRopeLiteral[s] ELSE IO.GetTokenRope[s, ToDash].token; IF IO.GetChar[s] # '- THEN GOTO Bad; end ¬ IF IO.PeekChar[s] = '" THEN IO.GetRopeLiteral[s] ELSE IO.GetLineRope[s]; EXITS Bad => RETURN[NIL, NIL]; }; PSub: TYPE ~ REF PSubBody; PSubBody: TYPE ~ RECORD [ pattern: ROPE, start, end: ROPE ]; Subrange: PUBLIC PatternMatch.MatchProc ~ { <<[value: ROPE, pattern: ROPE, pparsed: REF _ NIL] RETURNS [match: BOOLEAN, nothingGreater: BOOLEAN _ FALSE, pparsedNew: REF _ NIL]>> <> pr: PSub; WITH pparsed SELECT FROM ppr: PSub => pr ¬ ppr; ENDCASE => { -- pparsed is wrong type or NIL pr ¬ NEW[PSubBody]; pr.pattern ¬ pattern; [pr.start, pr.end] ¬ ParseSubrange[pattern]; IF pr.start = NIL THEN GOTO BadPattern; IF Rope.Compare[s1: pr.start, s2: pr.end, case: FALSE] = greater THEN GOTO BadPattern; }; pparsedNew ¬ pr; <> SELECT Rope.Compare[s1: value, s2: pr.start, case: FALSE] FROM less => match ¬ FALSE; equal => match ¬ TRUE; greater => SELECT Rope.Compare[s1: Rope.Substr[value, 0, Rope.Length[pr.end]], s2: pr.end, case: FALSE] FROM less => match ¬ TRUE; equal => match ¬ TRUE; greater => {match ¬ FALSE; nothingGreater ¬ TRUE} ENDCASE => ERROR; ENDCASE => ERROR; EXITS BadPattern => RETURN[match: FALSE, nothingGreater: TRUE, pparsedNew: NIL]; }; PNumSub: TYPE ~ REF PNumSubBody; PNumSubBody: TYPE ~ RECORD [ pattern: ROPE, startInt, endInt: INT ]; NumSubrange: PUBLIC PatternMatch.MatchProc ~ { <<[value: ROPE, pattern: ROPE, pparsed: REF _ NIL] RETURNS [match: BOOLEAN, nothingGreater: BOOLEAN _ FALSE, pparsedNew: REF _ NIL]>> <> valueInt: INT; pr: PNumSub; WITH pparsed SELECT FROM ppr: PNumSub => pr ¬ ppr; ENDCASE => { -- pparsed is wrong type or NIL start, end: ROPE; pr ¬ NEW[PNumSubBody]; pr.pattern ¬ pattern; [start, end] ¬ ParseSubrange[pattern]; IF start = NIL THEN GOTO BadPattern; pr.startInt ¬ Convert.IntFromRope[start ! Convert.Error => GOTO BadPattern]; pr.endInt ¬ Convert.IntFromRope[end ! Convert.Error => GOTO BadPattern]; IF pr.startInt > pr.endInt THEN GOTO BadPattern; }; pparsedNew ¬ pr; <> valueInt ¬ Convert.IntFromRope[value ! Convert.Error => GOTO BadValue]; match ¬ pr.startInt <= valueInt AND valueInt <= pr.endInt; EXITS BadPattern => RETURN[match: FALSE, nothingGreater: TRUE, pparsedNew: NIL]; BadValue => RETURN[match: FALSE, nothingGreater: FALSE, pparsedNew: pparsedNew]; }; PDateSub: TYPE ~ REF PDateSubBody; PDateSubBody: TYPE ~ RECORD [ pattern: ROPE, precision: Tempus.Precision, startDate, endDate: BasicTime.GMT ]; DateSubrange: PUBLIC PatternMatch.MatchProc ~ { <<[value: ROPE, pattern: ROPE, pparsed: REF _ NIL] RETURNS [match: BOOLEAN, nothingGreater: BOOLEAN _ FALSE, pparsedNew: REF _ NIL]>> <> <> valueDate: BasicTime.GMT; pr: PDateSub; WITH pparsed SELECT FROM ppr: PDateSub => pr ¬ ppr; ENDCASE => { -- pparsed is wrong type or NIL start, end: Rope.ROPE; pr ¬ NEW[PDateSubBody]; pr.pattern ¬ pattern; [start, end] ¬ ParseSubrange[pattern]; IF start = NIL THEN GOTO BadPattern; pr.startDate ¬ Tempus.Parse[rope: start, search: TRUE ! Tempus.Unintelligible => GOTO BadPattern].time; [pr.endDate, pr.precision] ¬ Tempus.Parse[rope: end, search: TRUE ! Tempus.Unintelligible => GOTO BadPattern]; <> SELECT pr.precision FROM years => pr.endDate ¬ Tempus.Adjust[baseTime: pr.endDate, years: 1, seconds: -1].time; months => pr.endDate ¬ Tempus.Adjust[baseTime: pr.endDate, months: 1, seconds: -1].time; days => pr.endDate ¬ Tempus.Adjust[baseTime: pr.endDate, days: 1, seconds: -1].time; hours => pr.endDate ¬ Tempus.Adjust[baseTime: pr.endDate, hours: 1, seconds: -1].time; minutes => pr.endDate ¬ Tempus.Adjust[baseTime: pr.endDate, minutes: 1, seconds: -1].time; ENDCASE => NULL; IF BasicTime.Period[from: pr.startDate, to: pr.endDate] < 0 THEN GOTO BadPattern; }; pparsedNew ¬ pr; <> valueDate ¬ Tempus.Parse[rope: value, search: TRUE ! Tempus.Unintelligible => GOTO BadValue].time; match ¬ BasicTime.Period[from: pr.startDate, to: valueDate] >= 0 AND BasicTime.Period[from: valueDate, to: pr.endDate] >= 0; EXITS BadPattern => RETURN[match: FALSE, nothingGreater: TRUE, pparsedNew: NIL]; BadValue => RETURN[match: FALSE, nothingGreater: FALSE, pparsedNew: pparsedNew]; }; PDate: TYPE ~ REF PDateBody; PDateBody: TYPE ~ RECORD [ pattern: ROPE, patternDate: BasicTime.GMT, patternPrecision: Tempus.Precision ]; DateAndTime: PUBLIC PatternMatch.MatchProc ~ { <<[value: ROPE, pattern: ROPE, pparsed: REF _ NIL] RETURNS [match: BOOLEAN, nothingGreater: BOOLEAN _ FALSE, pparsedNew: REF _ NIL]>> <> valueDate: BasicTime.GMT; pr: PDate; WITH pparsed SELECT FROM ppr: PDate => pr ¬ ppr; ENDCASE => { -- pparsed is wrong type or NIL pr ¬ NEW[PDateBody]; pr.pattern ¬ pattern; [pr.patternDate, pr.patternPrecision] ¬ Tempus.Parse[rope: pattern, search: TRUE ! Tempus.Unintelligible => GOTO BadPattern]; }; pparsedNew ¬ pr; <> valueDate ¬ Tempus.Parse[rope: value, search: TRUE ! Tempus.Unintelligible => GOTO BadValue].time; valueDate ¬ Tempus.Adjust[baseTime: valueDate, precisionOfResult: pr.patternPrecision].time; match ¬ BasicTime.Period[from: pr.patternDate, to: valueDate] = 0; EXITS BadPattern => RETURN[match: FALSE, nothingGreater: TRUE, pparsedNew: NIL]; BadValue => RETURN[match: FALSE, nothingGreater: FALSE, pparsedNew: pparsedNew]; }; DWIM: PUBLIC PROC [pattern: ROPE] RETURNS [ptype: ROPE] ~ { <> IsInt: PROC [r: ROPE] RETURNS [BOOLEAN ¬ TRUE] ~ { [] ¬ Convert.IntFromRope[r ! Convert.Error => GOTO NotInt]; EXITS NotInt => RETURN[FALSE]; }; IsTime: PROC [r: ROPE] RETURNS [BOOLEAN ¬ TRUE] ~ { [] ¬ Tempus.Parse[rope: r, search: TRUE ! Tempus.Unintelligible => GOTO NotTime]; EXITS NotTime => RETURN[FALSE]; }; start, end: ROPE; [start, end] ¬ ParseSubrange[pattern]; IF NOT (start=NIL AND end=NIL) THEN ptype ¬ SELECT TRUE FROM -- determine what type of subrange IsTime[start] AND IsTime[end] => "daterange", IsInt[start] AND IsInt[end] => "numrange", ENDCASE => "subrange" ELSE ptype ¬ SELECT TRUE FROM -- not a subrange Rope.Find[pattern, "*"]#-1 => "wildcard", Rope.Length[pattern]>2 AND IsTime[pattern] => "date", ENDCASE => "prefix"; }; CheckPattern: PUBLIC PROC [pattern: ROPE, proc: PatternMatch.MatchProc] RETURNS [ok: BOOLEAN ¬ TRUE, info: ROPE ¬ NIL] ~ { <> pparsed: REF; IF proc=NIL THEN RETURN[TRUE, NIL]; pparsed ¬ proc[value: "", pattern: pattern].pparsedNew; ok ¬ pparsed#NIL; WITH pparsed SELECT FROM pdsub: PDateSub => info ¬ Rope.Cat[Tempus.MakeRope[time: pdsub.startDate, precision: pdsub.precision], "-", Tempus.MakeRope[time: pdsub.endDate, precision: seconds]]; pdate: PDate => info ¬ Tempus.MakeRope[time: pdate.patternDate, precision: pdate.patternPrecision]; ENDCASE => info ¬ NIL; }; <> <<>> matchTable: SymTab.Ref; Lookup: PUBLIC PROC [name: ROPE] RETURNS [proc: PatternMatch.MatchProc] ~ { <> pr: REF PatternMatch.MatchProc ¬ NARROW[SymTab.Fetch[matchTable, name].val]; RETURN[IF pr#NIL THEN pr­ ELSE NIL]; }; Register: PUBLIC PROC [name: ROPE, proc: PatternMatch.MatchProc] ~ { <> IF proc#NIL THEN [] ¬ SymTab.Store[matchTable, name, NEW[PatternMatch.MatchProc ¬ proc]] ELSE [] ¬ SymTab.Delete[matchTable, name]; }; Pairs: PUBLIC PROC [action: PatternMatch.EachPairAction] RETURNS [BOOL] ~ { <<... enumerates pairs currently in the pattern registry in unspecified order; applies action to each pair until action returns TRUE or no more pairs; returns TRUE if some action returns TRUE.>> Action: SymTab.EachPairAction = { <<[key: ROPE, val: SymTab.Val] RETURNS [quit: BOOL _ FALSE]>> pr: REF PatternMatch.MatchProc ¬ NARROW[val]; RETURN[action[key, pr­]]; }; RETURN[SymTab.Pairs[matchTable, Action]]; }; <<>> <> matchTable ¬ NARROW[Atom.GetProp[$PatternMatch, $matchTable]]; IF matchTable = NIL THEN { matchTable ¬ SymTab.Create[case: FALSE]; Atom.PutProp[$PatternMatch, $matchTable, matchTable]; }; <> Register["exact", Equal]; Register["prefix", Prefix]; Register["wildcard", Wildcard]; Register["re", RE]; Register["soundexact", SoundexMatch]; Register["soundex", SoundexPrefix]; Register["subrange", Subrange]; Register["numrange", NumSubrange]; Register["daterange", DateSubrange]; Register["date", DateAndTime]; END. <> <> <> <> <> <> <<>>