<> <> <> <> <> <<>> DIRECTORY Ascii USING [Lower, Upper, NUL, SP], Atom USING [GetPName, MakeAtom], BasicTime USING [GMT, Period], IO USING [BreakProc, EndOfStream, GetTokenRope, GetLineRope, TokenProc, GetChar, SkipWhitespace, PeekChar, GetRopeLiteral, Error, STREAM, PutRope, PutF1, RIS, rope], PatternMatch USING [CheckPattern, DWIM, Lookup, MatchProc], Rope USING [Cat, Concat, Equal, Fetch, FromChar, Index, Length, Replace, ROPE, Run, SkipTo, Substr, Translate, TranslatorType], Tempus USING [Adjust, Parse, Precision, MakeRope, Unintelligible], LoganBerry, LoganQueryClass, LoganQuery; LoganQueryImpl: CEDAR PROGRAM IMPORTS Ascii, Atom, BasicTime, IO, PatternMatch, Rope, Tempus, LoganBerry, LoganQueryClass EXPORTS LoganQuery ~ BEGIN OPEN LoganQuery; ROPE: TYPE ~ Rope.ROPE; STREAM: TYPE ~ IO.STREAM; <> <> NextEntry: PUBLIC PROC [cursor: Cursor, dir: CursorDirection ¬ increasing] RETURNS [entry: LoganBerry.Entry] = { <> <> IF cursor.class.retrieve = NIL THEN RETURN[NIL]; RETURN[cursor.class.retrieve[cursor, dir]]; }; EndGenerate: PUBLIC PROC [cursor: Cursor] RETURNS [] = { <> IF cursor.class.destroy = NIL THEN RETURN; cursor.class.destroy[cursor]; }; <> <> QueryEntries: PUBLIC PROC [db: LoganBerry.OpenDB, patterns: AttributePatterns, baseIndex: LoganBerry.AttributeType ¬ NIL, defaultPattern: PatternType ¬ "DWIM", planOnly: BOOLEAN ¬ FALSE] RETURNS [cursor: Cursor, plan: QueryPlan] ~ { <> dbInfo: LoganBerry.SchemaInfo ¬ LoganBerry.Describe[db: db]; base: SubPlan ¬ NIL; <
> FOR p: AttributePatterns ¬ patterns, p.rest WHILE p # NIL DO possibleBase: BOOLEAN = baseIndex=NIL OR baseIndex=p.first.attr.type; new: SubPlan ¬ AnalyzeSubPlan[p.first, dbInfo, possibleBase, defaultPattern]; SELECT TRUE FROM NOT possibleBase, new.itype = $notAnIndex => plan ¬ CONS[new, plan]; baseIndex = p.first.attr.type, base = NIL => base ¬ new; new.cost <= base.cost => { -- new best base plan ¬ CONS[base, plan]; base ¬ new; }; ENDCASE => plan ¬ CONS[new, plan]; -- new plan not best ENDLOOP; IF base = NIL THEN { -- no attribute pattern for base IF baseIndex = NIL THEN baseIndex ¬ dbInfo.indices.first.key; -- pick arbitrary base base ¬ AnalyzeSubPlan[NEW[AttributePatternRec ¬ [attr: [baseIndex, NIL], ptype: NIL]], dbInfo, TRUE, defaultPattern]; }; IF base.itype = $notAnIndex THEN ERROR LoganBerry.Error[$NoIndex, "No suitable base index"]; plan ¬ CONS[base, plan]; IF planOnly THEN RETURN; <> cursor ¬ LoganQueryClass.GenerateEntries[db: db, key: base.attr.type, start: base.start, end: base.end]; <> FOR p: QueryPlan ¬ plan, p.rest WHILE p # NIL DO IF NOT Rope.Equal[p.first.attr.value, ""] AND p.first.filter#NIL THEN cursor ¬ LoganQueryClass.FilterEntries[input: cursor, pattern: p.first.attr.value, filter: p.first.filter, atype: p.first.attr.type, stopIfNothingGreater: FALSE]; ENDLOOP; cursor ¬ LoganQueryClass.EnableAborts[input: cursor]; }; AnalyzeSubPlan: PROC [p: AttributePattern, dbInfo: LoganBerry.SchemaInfo, computeCost: BOOLEAN ¬ TRUE, defaultPattern: PatternType] RETURNS [plan: SubPlan] ~ { okPattern: BOOLEAN; plan ¬ NEW[SubPlanRec ¬ [attr: p.attr, ptype: p.ptype]]; IF plan.ptype=NIL THEN plan.ptype ¬ defaultPattern; IF Rope.Equal[plan.ptype, "DWIM", FALSE] THEN { plan.wasDWIM ¬ TRUE; plan.ptype ¬ PatternMatch.DWIM[plan.attr.value]; }; plan.filter ¬ PatternMatch.Lookup[plan.ptype]; IF plan.filter=NIL THEN plan.errMsg ¬ "INVALID PATTERN MATCHING"; [okPattern, plan.infoMsg] ¬ PatternMatch.CheckPattern[plan.attr.value, plan.filter]; IF NOT okPattern THEN plan.errMsg ¬ "BAD PATTERN"; IF plan.wasDWIM THEN plan.infoMsg ¬ Rope.Cat["using ", plan.ptype, "...", plan.infoMsg]; IF NOT computeCost THEN RETURN; FOR i: LIST OF LoganBerry.IndexInfo ¬ dbInfo.indices, i.rest WHILE i#NIL DO IF i.first.key = plan.attr.type THEN { plan.itype ¬ i.first.order; EXIT; }; ENDLOOP; IF plan.itype = $notAnIndex THEN RETURN; [plan.start, plan.end] ¬ BaseStartEnd[plan.attr.value, plan.ptype, plan.itype]; plan.cost ¬ QueryCost[plan.start, plan.end, plan.itype]; }; QueryCost: PROC [start, end: ROPE, itype: ATOM ¬ $lex] RETURNS [est: REAL ¬ 1.0] ~ { <> N: NAT = 5; run: INT; sChar, eChar: CHAR; IF end=NIL THEN RETURN[1.0]; IF start=NIL THEN start ¬ " "; SELECT itype FROM $lex, $ascii => { run ¬ Rope.Run[s1: start, s2: end, case: itype=$ascii]; IF run > N THEN RETURN[0.0]; -- the query has negligible cost est ¬ 1.0; THROUGH [1..run] DO est ¬ est*0.01; ENDLOOP; -- est ¬ 1/100run sChar ¬ IF Rope.Length[start] <= run THEN ' ELSE Rope.Fetch[start, run]; eChar ¬ IF Rope.Length[end] <= run THEN ' ELSE Rope.Fetch[end, run]; IF itype=$lex THEN { sChar ¬ Ascii.Upper[sChar]; eChar ¬ Ascii.Upper[eChar]; }; est ¬ est*MIN[eChar-sChar,100]*0.01; }; $gmt => { startDate, endDate: BasicTime.GMT; year: REAL ¬ 31536000.0; startDate ¬ Tempus.Parse[rope: start, search: TRUE ! Tempus.Unintelligible => GOTO BadPattern].time; endDate ¬ Tempus.Parse[rope: end, search: TRUE ! Tempus.Unintelligible => GOTO BadPattern].time; est ¬ BasicTime.Period[from: startDate, to: endDate]/year; -- assume one year is complete database IF est < 0 OR est > 1.0 THEN est ¬ 1.0; EXITS BadPattern => RETURN[1.0]; }; ENDCASE => RETURN[0.5]; -- don't know the index layout so assume half the index will be needed on average }; BaseStartEnd: PROC [pattern: ROPE, ptype: ROPE, itype: ATOM ¬ $lex] RETURNS [start, end: ROPE] ~ { <> ENABLE SyntaxError => {start ¬ end ¬ NIL; CONTINUE;}; -- raised by ParseSubrange Bump: PROC [old: ROPE] RETURNS [new: ROPE] ~ INLINE { last: INT = Rope.Length[old] - 1; new ¬ IF last >= 0 THEN Rope.Replace[base: old, start: last, len: 1, with: Rope.FromChar[SUCC[Ascii.Lower[Rope.Fetch[old, last]]]]] ELSE NIL; }; Upper: Rope.TranslatorType = { <<[old: CHAR] RETURNS [new: CHAR] -- old Cedar7.0 impl>> <<[old: CHAR] RETURNS [CHAR]>> new: CHAR ¬ Ascii.Upper[old]; RETURN[new]; }; Lower: Rope.TranslatorType = { <<[old: CHAR] RETURNS [new: CHAR] --old Cedar7.0 impl>> <<[old: CHAR] RETURNS [CHAR]>> new: CHAR ¬ Ascii.Lower[old]; RETURN[new]; }; SELECT itype FROM $lex => { SELECT TRUE FROM Rope.Equal[ptype, "exact", FALSE] => { start ¬ pattern; end ¬ pattern; }; Rope.Equal[ptype, "prefix", FALSE] => { start ¬ pattern; end ¬ Bump[start]; }; Rope.Equal[ptype, "wildcard", FALSE] => { start ¬ Rope.Substr[base: pattern, len: Rope.Index[s1: pattern, s2: "*"]]; end ¬ Bump[start]; }; Rope.Equal[ptype, "re", FALSE] => { i: INT ¬ Rope.SkipTo[s: pattern, skip: "\'#[^$*+(\\<{!"]; -- look for special chars IF NOT i = Rope.Length[pattern] AND Rope.Fetch[pattern, i] = '* THEN -- could be zero of previous char i ¬ i-1; start ¬ Rope.Substr[base: pattern, len: i]; end ¬ Bump[start]; }; Rope.Equal[ptype, "soundex", FALSE] => { start ¬ Rope.FromChar[Rope.Fetch[base: pattern, index: 0]]; end ¬ Bump[start]; }; Rope.Equal[ptype, "subrange", FALSE] => { [start, end] ¬ ParseSubrange[pattern]; }; Rope.Equal[ptype, "numrange", FALSE] => { <> s, e: ROPE; [s, e] ¬ ParseSubrange[pattern ! SyntaxError => {start ¬ NIL; CONTINUE}]; IF Rope.Length[s] = Rope.Length[e] THEN { start ¬ Rope.Substr[base: s, len: Rope.Run[s1: s, s2: e]]; end ¬ Bump[start]; } ELSE start ¬ end ¬ NIL; -- can do nothing intelligent, e.g. 300-3000 }; Rope.Equal[ptype, "daterange", FALSE] => { start ¬ end ¬ NIL; -- can do nothing intelligent }; Rope.Equal[ptype, "date", FALSE] => { start ¬ end ¬ NIL; -- can do nothing intelligent }; ENDCASE => start ¬ end ¬ NIL; }; $ascii => { start ¬ BaseStartEnd[Rope.Translate[base: pattern, translator: Upper], ptype, $lex].start; end ¬ BaseStartEnd[Rope.Translate[base: pattern, translator: Lower], ptype, $lex].end; }; $gmt => { SELECT TRUE FROM Rope.Equal[ptype, "date", FALSE] => { patternDate: BasicTime.GMT; patternPrecision: Tempus.Precision; [patternDate, patternPrecision] ¬ Tempus.Parse[rope: pattern, search: TRUE ! Tempus.Unintelligible => GOTO BadPattern]; start ¬ Tempus.MakeRope[time: patternDate, precision: patternPrecision]; [patternDate, patternPrecision] ¬ SELECT patternPrecision FROM years => Tempus.Adjust[years: 1, baseTime: patternDate], months => Tempus.Adjust[months: 1, baseTime: patternDate], days => Tempus.Adjust[days: 1, baseTime: patternDate], hours => Tempus.Adjust[hours: 1, baseTime: patternDate], minutes => Tempus.Adjust[minutes: 1, baseTime: patternDate], seconds => Tempus.Adjust[seconds: 1, baseTime: patternDate], ENDCASE => Tempus.Adjust[seconds: 1, baseTime: patternDate]; end ¬ Tempus.MakeRope[time: patternDate, precision: patternPrecision]; EXITS BadPattern => start ¬ end ¬ NIL; }; Rope.Equal[ptype, "daterange", FALSE] => { patternDate: BasicTime.GMT; patternPrecision: Tempus.Precision; [start, end] ¬ ParseSubrange[pattern]; [patternDate, patternPrecision] ¬ Tempus.Parse[rope: start, search: TRUE ! Tempus.Unintelligible => GOTO BadPattern]; start ¬ Tempus.MakeRope[time: patternDate, precision: patternPrecision]; [patternDate, patternPrecision] ¬ Tempus.Parse[rope: end, search: TRUE ! Tempus.Unintelligible => GOTO BadPattern]; patternDate ¬ SELECT patternPrecision FROM years => Tempus.Adjust[baseTime: patternDate, years: 1, seconds: -1].time, months => Tempus.Adjust[baseTime: patternDate, months: 1, seconds: -1].time, days => Tempus.Adjust[baseTime: patternDate, days: 1, seconds: -1].time, hours => Tempus.Adjust[baseTime: patternDate, hours: 1, seconds: -1].time, minutes => Tempus.Adjust[baseTime: patternDate, minutes: 1, seconds: -1].time, ENDCASE => Tempus.Adjust[baseTime: patternDate, seconds: 1].time; end ¬ Tempus.MakeRope[time: patternDate, precision: seconds]; EXITS BadPattern => start ¬ end ¬ NIL; }; ENDCASE => start ¬ end ¬ NIL; }; $int => { SELECT TRUE FROM Rope.Equal[ptype, "exact", FALSE] => start ¬ end ¬ pattern; Rope.Equal[ptype, "intrange", FALSE] => [start, end] ¬ ParseSubrange[pattern]; ENDCASE => start ¬ end ¬ NIL; }; ENDCASE => start ¬ end ¬ NIL; -- search complete database }; <> SyntaxError: PUBLIC ERROR [explanation: ROPE ¬ NIL] = CODE; ReadAttributePattern: PROC [s: STREAM, prefetch: ROPE ¬ NIL] RETURNS [a: AttributePattern] ~ { <> ENABLE IO.EndOfStream => ERROR SyntaxError["unexpected end of input"]; ch: CHAR; token: ROPE ¬ prefetch; IF token = NIL THEN token ¬ IO.GetTokenRope[s, IO.TokenProc ! IO.EndOfStream => GOTO none].token; a ¬ NEW[AttributePatternRec]; a.attr.type ¬ Atom.MakeAtom[token]; ch ¬ IO.GetChar[s]; -- attribute separation char or "(" IF ch = '( THEN { -- read pattern type a.ptype ¬ IO.GetTokenRope[s, IO.TokenProc].token; ch ¬ IO.GetChar[s]; IF ch # ') THEN ERROR SyntaxError[Rope.Concat["unmatched ()'s: last read ", a.ptype]]; ch ¬ IO.GetChar[s]; }; IF ch # ': THEN ERROR SyntaxError[Rope.Concat["missing colon: last read ", a.ptype]]; [] ¬ IO.SkipWhitespace[stream: s, flushComments: FALSE]; IF IO.PeekChar[s] = '" THEN { a.attr.value ¬ IO.GetRopeLiteral[s ! IO.Error => ERROR SyntaxError["Malformed rope literal"]]; } ELSE a.attr.value ¬ IO.GetTokenRope[s, TokenProc].token; EXITS none => RETURN[NIL]; }; ReadAttributePatterns: PUBLIC PROC [s: IO.STREAM] RETURNS [ap: AttributePatterns] ~ { <> a: AttributePattern; endOfAp: AttributePatterns ¬ NIL; a ¬ ReadAttributePattern[s]; WHILE a # NIL DO IF endOfAp = NIL THEN ap ¬ endOfAp ¬ LIST[a] ELSE { endOfAp.rest ¬ LIST[a]; endOfAp ¬ endOfAp.rest; }; a ¬ ReadAttributePattern[s]; ENDLOOP; }; WriteAttributePattern: PROC [s: IO.STREAM, a: AttributePattern] RETURNS [] ~ { IO.PutRope[s, Atom.GetPName[a.attr.type]]; IF a.ptype # NIL THEN IO.PutF1[s, "(%g)", IO.rope[a.ptype]]; IO.PutF1[s, ": ""%g"" ", IO.rope[a.attr.value]]; }; WriteAttributePatterns: PUBLIC PROC [s: IO.STREAM, ap: AttributePatterns] RETURNS [] ~ { FOR p: AttributePatterns ¬ ap, p.rest WHILE p # NIL DO WriteAttributePattern[s, p.first]; ENDLOOP; }; PatternsToEntry: PUBLIC PROC [ap: AttributePatterns] RETURNS [entry: LoganBerry.Entry] ~ { endOfEntry: LoganBerry.Entry ¬ NIL; FOR p: AttributePatterns ¬ ap, p.rest WHILE p # NIL DO IF endOfEntry = NIL THEN entry ¬ endOfEntry ¬ LIST[p.first.attr] ELSE { endOfEntry.rest ¬ LIST[p.first.attr]; endOfEntry ¬ endOfEntry.rest; }; ENDLOOP; }; EntryToPatterns: PUBLIC PROC [entry: LoganBerry.Entry] RETURNS [ap: AttributePatterns] ~ { pattern: AttributePattern; endOfPattern: AttributePatterns ¬ NIL; FOR e: LoganBerry.Entry ¬ entry, e.rest WHILE e # NIL DO pattern ¬ NEW[AttributePatternRec ¬ [attr: e.first]]; IF endOfPattern = NIL THEN ap ¬ endOfPattern ¬ LIST[pattern] ELSE { endOfPattern.rest ¬ LIST[pattern]; endOfPattern ¬ endOfPattern.rest; }; ENDLOOP; }; <> <> <<>> BooleanFilterEntries: PUBLIC PROC [input: Cursor, query: ParseTree, inputOrder: LoganBerry.AttributeType, primaryKey: LoganBerry.AttributeType, defaultPattern: PatternType ¬ "DWIM"] RETURNS [output: Cursor] ~ { <> <> output ¬ CursorForSubtree[input, query, inputOrder, primaryKey, defaultPattern]; output ¬ LoganQueryClass.EnableAborts[input: output]; }; CursorForSubtree: PROC [input: Cursor, node: ParseNode, order, key: LoganBerry.AttributeType, dp: PatternType] RETURNS [output: Cursor] ~ { <> LookupFilter: PROC [ap: AttributePattern, default: PatternType] RETURNS [proc: PatternMatch.MatchProc] ~ { IF ap.ptype=NIL THEN ap.ptype ¬ default; IF Rope.Equal[ap.ptype, "DWIM", FALSE] THEN ap.ptype ¬ PatternMatch.DWIM[ap.attr.value]; proc ¬ PatternMatch.Lookup[ap.ptype]; }; SELECT node.tag FROM $and => { <> output1: Cursor ¬ CursorForSubtree[input, node.child1, order, key, dp]; output ¬ CursorForSubtree[output1, node.child2, order, key, dp]; }; $or => { <> input1, input2, output1, output2: Cursor; [input1, input2] ¬ LoganQueryClass.DuplicateEntries[input]; output1 ¬ CursorForSubtree[input1, node.child1, order, key, dp]; output2 ¬ CursorForSubtree[input2, node.child2, order, key, dp]; output ¬ LoganQueryClass.MergeEntries[output1, output2, order]; IF order = key THEN output ¬ LoganQueryClass.UnDuplicateEntries[output, order]; <> }; $not => { <> input1, fullinput, output1: Cursor; IF node.child1.tag = $ap THEN { -- simple case output ¬ LoganQueryClass.AntiFilterEntries[input: input, pattern: node.child1.ap.attr.value, filter: LookupFilter[node.child1.ap, dp], atype: node.child1.ap.attr.type]; RETURN [output]; }; [input1, fullinput] ¬ LoganQueryClass.DuplicateEntries[input]; output1 ¬ CursorForSubtree[input1, node.child1, order, key, dp]; output ¬ LoganQueryClass.SubtractEntries[fullinput, output1, key, order]; }; $ap => { <> output ¬ LoganQueryClass.FilterEntries[input: input, pattern: node.ap.attr.value, filter: LookupFilter[node.ap, dp], atype: node.ap.attr.type]; }; ENDCASE; }; <<>> <> <> <<>> <> <> <> <> <> <<>> <> <<>> TokenStream: TYPE = REF TokenStreamRec; TokenStreamRec: TYPE = RECORD [ stream: STREAM, prefetch: ROPE _ NIL ]; ParseBooleanQuery: PUBLIC PROC [s: IO.STREAM] RETURNS [tree: ParseTree] ~ { <> ts: TokenStream ¬ NEW[TokenStreamRec ¬ [s]]; tree ¬ GetQuery[ts]; }; <<>> GetQuery: PROC [ts: TokenStream] RETURNS [tree: ParseTree] ~ { <> subTree: ParseTree ¬ GetBoolexpr[ts]; IF PeekToken[ts] # NIL THEN { tree ¬ NEW[ParseNodeRec ¬ [tag: $and]]; tree.child1 ¬ subTree; tree.child2 ¬ GetQuery[ts]; } ELSE { tree ¬ subTree; }; }; GetBoolexpr: PROC [ts: TokenStream] RETURNS [tree: ParseTree] ~ { <> subTree: ParseTree ¬ GetTerm[ts]; IF Rope.Equal[PeekToken[ts], "OR", FALSE] THEN { [] ¬ NextToken[ts]; tree ¬ NEW[ParseNodeRec ¬ [tag: $or]]; tree.child1 ¬ subTree; tree.child2 ¬ GetBoolexpr[ts]; } ELSE { tree ¬ subTree; }; }; GetTerm: PROC [ts: TokenStream] RETURNS [tree: ParseTree] ~ { <> subTree: ParseTree ¬ GetFactor[ts]; IF Rope.Equal[PeekToken[ts], "AND", FALSE] THEN { [] ¬ NextToken[ts]; tree ¬ NEW[ParseNodeRec ¬ [tag: $and]]; tree.child1 ¬ subTree; tree.child2 ¬ GetTerm[ts]; } ELSE { tree ¬ subTree; }; }; GetFactor: PROC [ts: TokenStream] RETURNS [tree: ParseTree] ~ { <> token: ROPE ¬ PeekToken[ts]; SELECT TRUE FROM Rope.Equal[token, "NOT", FALSE] => { token ¬ NextToken[ts]; tree ¬ NEW[ParseNodeRec ¬ [tag: $not]]; tree.child1 ¬ GetFactor[ts]; }; Rope.Equal[token, "("] => { token ¬ NextToken[ts]; tree ¬ GetBoolexpr[ts]; token ¬ NextToken[ts]; IF NOT Rope.Equal[token, ")"] THEN ERROR SyntaxError["Unmatched parenthesis"]; }; ENDCASE => { -- attribute-pattern tree ¬ GetAttrributePattern[ts]; }; }; GetAttrributePattern: PROC [ts: TokenStream] RETURNS [tree: ParseTree] ~ { <> tree ¬ NEW[ParseNodeRec ¬ [tag: $ap]]; tree.ap ¬ ReadAttributePattern[ts.stream, ts.prefetch]; IF tree.ap = NIL THEN ERROR SyntaxError["Unexpected end of input"]; ts.prefetch ¬ NIL; }; TokenProc: IO.BreakProc ~ { RETURN[SELECT char FROM IN [Ascii.NUL .. Ascii.SP] => sepr, '(, '), ': => break, ENDCASE => other]; }; NextToken: PROC [ts: TokenStream] RETURNS [token: ROPE] ~ { IF ts.prefetch # NIL THEN token ¬ ts.prefetch ELSE token ¬ IO.GetTokenRope[ts.stream, TokenProc ! IO.EndOfStream => {token ¬ NIL; CONTINUE}].token; ts.prefetch ¬ NIL; }; PeekToken: PROC [ts: TokenStream] RETURNS [token: ROPE] ~ { IF ts.prefetch = NIL THEN ts.prefetch ¬ IO.GetTokenRope[ts.stream, TokenProc ! IO.EndOfStream => CONTINUE].token; token ¬ ts.prefetch; }; <> AbortQuery: PUBLIC PROC [cursor: Cursor] ~ { <> LoganQueryClass.AbortQuery[cursor]; }; <<>> <> ParseSubrange: PROC [r: ROPE] RETURNS [start, end: ROPE ¬ NIL] ~ { <> ENABLE IO.EndOfStream, IO.Error => GOTO Bad; ToDash: IO.BreakProc = { <<[char: CHAR] RETURNS [IO.CharClass]>> RETURN[SELECT char FROM '- => sepr, ENDCASE => other]; }; s: IO.STREAM ¬ IO.RIS[r]; start ¬ IF IO.PeekChar[s] = '" THEN IO.GetRopeLiteral[s] ELSE IO.GetTokenRope[s, ToDash].token; IF IO.GetChar[s] # '- THEN GOTO Bad; end ¬ IF IO.PeekChar[s] = '" THEN IO.GetRopeLiteral[s] ELSE IO.GetLineRope[s]; EXITS Bad => ERROR SyntaxError["Not a subrange"]; }; END.