[_CDCSL_93-16_]<1>Cedar>release>LoganQuery>LoganQueryImpl.mesa

LoganQueryImpl.mesa

Doug Terry, July 21, 1992 10:07 am PDT

Willie-s, April 27, 1992 11:40 am PDT

LoganQuery allows more complicated queries to be performed on a LoganBerry database than the simple retrievals provided by the LoganBerry interface. This module implements the complex queries, i.e. those that involve composing the basic LoganQuery classes. These include multi-attribute queries and boolean queries. See LoganQueryClassImpl.mesa for the implementation of the basic building blocks (LoganQuery classes) used by this module.

DIRECTORY

Ascii USING [Lower, Upper, NUL, SP],

Atom USING [GetPName, MakeAtom],

BasicTime USING [GMT, Period],

IO USING [BreakProc, EndOfStream, GetTokenRope, GetLineRope, TokenProc, GetChar, SkipWhitespace, PeekChar, GetRopeLiteral, Error, STREAM, PutRope, PutF1, RIS, rope],

PatternMatch USING [CheckPattern, DWIM, Lookup, MatchProc],

Rope USING [Cat, Concat, Equal, Fetch, FromChar, Index, Length, Replace, ROPE, Run, SkipTo, Substr, Translate, TranslatorType],

Tempus USING [Adjust, Parse, Precision, MakeRope, Unintelligible],

LoganBerry,

LoganQueryClass,

LoganQuery;

LoganQueryImpl: CEDAR PROGRAM

IMPORTS Ascii, Atom, BasicTime, IO, PatternMatch, Rope, Tempus, LoganBerry, LoganQueryClass

EXPORTS LoganQuery

~ BEGIN OPEN LoganQuery;

ROPE: TYPE ~ Rope.ROPE;

STREAM: TYPE ~ IO.STREAM;

Basic operations

The following operations are syntactically and semantically similar to those provided by LoganBerry. Any errors raised by these operations are generated by LoganBerry.

NextEntry: PUBLIC PROC [cursor: Cursor, dir: CursorDirection ¬ increasing] RETURNS [entry: LoganBerry.Entry] = {

Retrieves the next entry relative to the given cursor. The cursor is automatically updated so that NextEntry may be repeatedly called to enumerate entries. NIL is returned if the cursor is at the end of the sequence and dir=increasing or at the start of the sequence and dir=decreasing.

Errors: IndexNotOpen, BadIndex, LogNotOpen, BadLogEntry

IF cursor.class.retrieve = NIL THEN RETURN[NIL];

RETURN[cursor.class.retrieve[cursor, dir]];

};

EndGenerate: PUBLIC PROC [cursor: Cursor] RETURNS [] = {

Releases the cursor; no further operations may be performed using the given cursor. This must be called once for every cursor created.

IF cursor.class.destroy = NIL THEN RETURN;

cursor.class.destroy[cursor];

};

Query planning and execution

In actuality, there is no $Query class. The cursor returned by QueryEntries is of class $Filter; it is obtained by one or more calls to FilterEntries.

QueryEntries: PUBLIC PROC [db: LoganBerry.OpenDB, patterns: AttributePatterns, baseIndex: LoganBerry.AttributeType ¬ NIL, defaultPattern: PatternType ¬ "DWIM", planOnly: BOOLEAN ¬ FALSE] RETURNS [cursor: Cursor, plan: QueryPlan] ~ {

Returns a cursor; retrievals using this new cursor match the attribute patterns against entries in the database. The baseIndex parameter governs which attribute type is used as the base index for the query; for baseIndex=NIL, an index is chosen that attempts to optimize the execution of the query (use this unless you really care about the order in which database entries are retrieved). The returned query plan provides information about the query. If planOnly is TRUE, then a query plan is formulated and returned, but a cursor for this query is not created.

dbInfo: LoganBerry.SchemaInfo ¬ LoganBerry.Describe[db: db];

base: SubPlan ¬ NIL;

figure out where to start and end in database to save work, and pick a "good" base index if necessary (the index that minimizes expected cost)

FOR p: AttributePatterns ¬ patterns, p.rest WHILE p # NIL DO

possibleBase: BOOLEAN = baseIndex=NIL OR baseIndex=p.first.attr.type;

new: SubPlan ¬ AnalyzeSubPlan[p.first, dbInfo, possibleBase, defaultPattern];

SELECT TRUE FROM

NOT possibleBase, new.itype = $notAnIndex => plan ¬ CONS[new, plan];

baseIndex = p.first.attr.type, base = NIL => base ¬ new;

new.cost <= base.cost => { -- new best base

plan ¬ CONS[base, plan];

base ¬ new;

};

ENDCASE => plan ¬ CONS[new, plan]; -- new plan not best

ENDLOOP;

IF base = NIL THEN { -- no attribute pattern for base

IF baseIndex = NIL THEN baseIndex ¬ dbInfo.indices.first.key; -- pick arbitrary base

base ¬ AnalyzeSubPlan[NEW[AttributePatternRec ¬ [attr: [baseIndex, NIL], ptype: NIL]], dbInfo, TRUE, defaultPattern];

};

IF base.itype = $notAnIndex THEN

ERROR LoganBerry.Error[$NoIndex, "No suitable base index"];

plan ¬ CONS[base, plan];

IF planOnly THEN RETURN;

simple base query

cursor ¬ LoganQueryClass.GenerateEntries[db: db, key: base.attr.type, start: base.start, end: base.end];

build up filters

FOR p: QueryPlan ¬ plan, p.rest WHILE p # NIL DO

IF NOT Rope.Equal[p.first.attr.value, ""] AND p.first.filter#NIL THEN

cursor ¬ LoganQueryClass.FilterEntries[input: cursor, pattern: p.first.attr.value, filter: p.first.filter, atype: p.first.attr.type, stopIfNothingGreater: FALSE];

ENDLOOP;

cursor ¬ LoganQueryClass.EnableAborts[input: cursor];

};

AnalyzeSubPlan: PROC [p: AttributePattern, dbInfo: LoganBerry.SchemaInfo, computeCost: BOOLEAN ¬ TRUE, defaultPattern: PatternType] RETURNS [plan: SubPlan] ~ {

okPattern: BOOLEAN;

plan ¬ NEW[SubPlanRec ¬ [attr: p.attr, ptype: p.ptype]];

IF plan.ptype=NIL THEN plan.ptype ¬ defaultPattern;

IF Rope.Equal[plan.ptype, "DWIM", FALSE] THEN {

plan.wasDWIM ¬ TRUE;

plan.ptype ¬ PatternMatch.DWIM[plan.attr.value];

};

plan.filter ¬ PatternMatch.Lookup[plan.ptype];

IF plan.filter=NIL THEN

plan.errMsg ¬ "INVALID PATTERN MATCHING";

[okPattern, plan.infoMsg] ¬ PatternMatch.CheckPattern[plan.attr.value, plan.filter];

IF NOT okPattern THEN

plan.errMsg ¬ "BAD PATTERN";

IF plan.wasDWIM THEN

plan.infoMsg ¬ Rope.Cat["using ", plan.ptype, "...", plan.infoMsg];

IF NOT computeCost THEN RETURN;

FOR i: LIST OF LoganBerry.IndexInfo ¬ dbInfo.indices, i.rest WHILE i#NIL DO

IF i.first.key = plan.attr.type THEN {

plan.itype ¬ i.first.order;

EXIT;

};

ENDLOOP;

IF plan.itype = $notAnIndex THEN RETURN;

[plan.start, plan.end] ¬ BaseStartEnd[plan.attr.value, plan.ptype, plan.itype];

plan.cost ¬ QueryCost[plan.start, plan.end, plan.itype];

};

QueryCost: PROC [start, end: ROPE, itype: ATOM ¬ $lex] RETURNS [est: REAL ¬ 1.0] ~ {

Estimates the cost of enumerating an index of the given type from start to end. The cost model is quite simple: it assumes that the probability of any given character being the nth character of a string is 0.01 for n in [1..N]. The estimate returned is the estimated fraction of the index that must be traversed. For instance, start="a" and end="b" for itype=$lex yields est=0.01, while start="aaa" and end="aac" for itype=$lex yields est=0.000002.

N: NAT = 5;

run: INT;

sChar, eChar: CHAR;

IF end=NIL THEN RETURN[1.0];

IF start=NIL THEN start ¬ " ";

SELECT itype FROM

$lex, $ascii => {

run ¬ Rope.Run[s1: start, s2: end, case: itype=$ascii];

IF run > N THEN RETURN[0.0]; -- the query has negligible cost

est ¬ 1.0;

THROUGH [1..run] DO est ¬ est*0.01; ENDLOOP; -- est ¬ 1/100run

sChar ¬ IF Rope.Length[start] <= run THEN ' ELSE Rope.Fetch[start, run];

eChar ¬ IF Rope.Length[end] <= run THEN ' ELSE Rope.Fetch[end, run];

IF itype=$lex THEN {

sChar ¬ Ascii.Upper[sChar];

eChar ¬ Ascii.Upper[eChar];

};

est ¬ est*MIN[eChar-sChar,100]*0.01;

};

$gmt => {

startDate, endDate: BasicTime.GMT;

year: REAL ¬ 31536000.0;

startDate ¬ Tempus.Parse[rope: start, search: TRUE ! Tempus.Unintelligible => GOTO BadPattern].time;

endDate ¬ Tempus.Parse[rope: end, search: TRUE ! Tempus.Unintelligible => GOTO BadPattern].time;

est ¬ BasicTime.Period[from: startDate, to: endDate]/year; -- assume one year is complete database

IF est < 0 OR est > 1.0 THEN est ¬ 1.0;

EXITS

BadPattern => RETURN[1.0];

};

ENDCASE => RETURN[0.5]; -- don't know the index layout so assume half the index will be needed on average

};

BaseStartEnd: PROC [pattern: ROPE, ptype: ROPE, itype: ATOM ¬ $lex] RETURNS [start, end: ROPE] ~ {

This routine exists solely for performance reasons. The intent is to identify a range [start..end] of entries in an index that includes all possible values that match the given pattern; clearly this range depends on the ordering of entries in the index ($lex, $ascii, $gmt, etc.). It is always acceptable to return [NIL, NIL], which indicates that the whole index must be enumerated; returning a range that is too small or incorrect, on the other hand, leads to incomplete answers to a filtered query.

ENABLE SyntaxError => {start ¬ end ¬ NIL; CONTINUE;}; -- raised by ParseSubrange

Bump: PROC [old: ROPE] RETURNS [new: ROPE] ~ INLINE {

last: INT = Rope.Length[old] - 1;

new ¬ IF last >= 0 THEN Rope.Replace[base: old, start: last, len: 1, with: Rope.FromChar[SUCC[Ascii.Lower[Rope.Fetch[old, last]]]]] ELSE NIL;

};

Upper: Rope.TranslatorType = {

[old: CHAR] RETURNS [new: CHAR] -- old Cedar7.0 impl

[old: CHAR] RETURNS [CHAR]

new: CHAR ¬ Ascii.Upper[old];

RETURN[new];

};

Lower: Rope.TranslatorType = {

[old: CHAR] RETURNS [new: CHAR] --old Cedar7.0 impl

[old: CHAR] RETURNS [CHAR]

new: CHAR ¬ Ascii.Lower[old];

RETURN[new];

};

SELECT itype FROM

$lex => {

SELECT TRUE FROM

Rope.Equal[ptype, "exact", FALSE] => {

start ¬ pattern;

end ¬ pattern;

};

Rope.Equal[ptype, "prefix", FALSE] => {

start ¬ pattern;

end ¬ Bump[start];

};

Rope.Equal[ptype, "wildcard", FALSE] => {

start ¬ Rope.Substr[base: pattern, len: Rope.Index[s1: pattern, s2: "*"]];

end ¬ Bump[start];

};

Rope.Equal[ptype, "re", FALSE] => {

i: INT ¬ Rope.SkipTo[s: pattern, skip: "\'#[^$*+(\\<{!"]; -- look for special chars

IF NOT i = Rope.Length[pattern] AND Rope.Fetch[pattern, i] = '* THEN -- could be zero of previous char

i ¬ i-1;

start ¬ Rope.Substr[base: pattern, len: i];

end ¬ Bump[start];

};

Rope.Equal[ptype, "soundex", FALSE] => {

start ¬ Rope.FromChar[Rope.Fetch[base: pattern, index: 0]];

end ¬ Bump[start];

};

Rope.Equal[ptype, "subrange", FALSE] => {

[start, end] ¬ ParseSubrange[pattern];

};

Rope.Equal[ptype, "numrange", FALSE] => {

find greatest common prefix for start and end

s, e: ROPE;

[s, e] ¬ ParseSubrange[pattern ! SyntaxError => {start ¬ NIL; CONTINUE}];

IF Rope.Length[s] = Rope.Length[e]

THEN {

start ¬ Rope.Substr[base: s, len: Rope.Run[s1: s, s2: e]];

end ¬ Bump[start];

}

ELSE start ¬ end ¬ NIL; -- can do nothing intelligent, e.g. 300-3000

};

Rope.Equal[ptype, "daterange", FALSE] => {

start ¬ end ¬ NIL; -- can do nothing intelligent

};

Rope.Equal[ptype, "date", FALSE] => {

start ¬ end ¬ NIL; -- can do nothing intelligent

};

ENDCASE => start ¬ end ¬ NIL;

};

$ascii => {

start ¬ BaseStartEnd[Rope.Translate[base: pattern, translator: Upper], ptype, $lex].start;

end ¬ BaseStartEnd[Rope.Translate[base: pattern, translator: Lower], ptype, $lex].end;

};

$gmt => {

SELECT TRUE FROM

Rope.Equal[ptype, "date", FALSE] => {

patternDate: BasicTime.GMT;

patternPrecision: Tempus.Precision;

[patternDate, patternPrecision] ¬ Tempus.Parse[rope: pattern, search: TRUE ! Tempus.Unintelligible => GOTO BadPattern];

start ¬ Tempus.MakeRope[time: patternDate, precision: patternPrecision];

[patternDate, patternPrecision] ¬ SELECT patternPrecision FROM

years => Tempus.Adjust[years: 1, baseTime: patternDate],

months => Tempus.Adjust[months: 1, baseTime: patternDate],

days => Tempus.Adjust[days: 1, baseTime: patternDate],

hours => Tempus.Adjust[hours: 1, baseTime: patternDate],

minutes => Tempus.Adjust[minutes: 1, baseTime: patternDate],

seconds => Tempus.Adjust[seconds: 1, baseTime: patternDate],

ENDCASE => Tempus.Adjust[seconds: 1, baseTime: patternDate];

end ¬ Tempus.MakeRope[time: patternDate, precision: patternPrecision];

EXITS

BadPattern => start ¬ end ¬ NIL;

};

Rope.Equal[ptype, "daterange", FALSE] => {

patternDate: BasicTime.GMT;

patternPrecision: Tempus.Precision;

[start, end] ¬ ParseSubrange[pattern];

[patternDate, patternPrecision] ¬ Tempus.Parse[rope: start, search: TRUE ! Tempus.Unintelligible => GOTO BadPattern];

start ¬ Tempus.MakeRope[time: patternDate, precision: patternPrecision];

[patternDate, patternPrecision] ¬ Tempus.Parse[rope: end, search: TRUE ! Tempus.Unintelligible => GOTO BadPattern];

patternDate ¬ SELECT patternPrecision FROM

years => Tempus.Adjust[baseTime: patternDate, years: 1, seconds: -1].time,

months => Tempus.Adjust[baseTime: patternDate, months: 1, seconds: -1].time,

days => Tempus.Adjust[baseTime: patternDate, days: 1, seconds: -1].time,

hours => Tempus.Adjust[baseTime: patternDate, hours: 1, seconds: -1].time,

minutes => Tempus.Adjust[baseTime: patternDate, minutes: 1, seconds: -1].time,

ENDCASE => Tempus.Adjust[baseTime: patternDate, seconds: 1].time;

end ¬ Tempus.MakeRope[time: patternDate, precision: seconds];

EXITS

BadPattern => start ¬ end ¬ NIL;

};

ENDCASE => start ¬ end ¬ NIL;

};

$int => {

SELECT TRUE FROM

Rope.Equal[ptype, "exact", FALSE] =>

start ¬ end ¬ pattern;

Rope.Equal[ptype, "intrange", FALSE] => [start, end] ¬ ParseSubrange[pattern];

ENDCASE => start ¬ end ¬ NIL;

};

ENDCASE => start ¬ end ¬ NIL; -- search complete database

};

Attribute patterns

SyntaxError: PUBLIC ERROR [explanation: ROPE ¬ NIL] = CODE;

ReadAttributePattern: PROC [s: STREAM, prefetch: ROPE ¬ NIL] RETURNS [a: AttributePattern] ~ {

An attribute pattern is of the form: atype(ptype): avalue. The ptype is optional, and there may be whitespace preceeding the avalue. The avalue can be either a token or rope literal.

ENABLE IO.EndOfStream => ERROR SyntaxError["unexpected end of input"];

ch: CHAR;

token: ROPE ¬ prefetch;

IF token = NIL THEN

token ¬ IO.GetTokenRope[s, IO.TokenProc ! IO.EndOfStream => GOTO none].token;

a ¬ NEW[AttributePatternRec];

a.attr.type ¬ Atom.MakeAtom[token];

ch ¬ IO.GetChar[s]; -- attribute separation char or "("

IF ch = '( THEN { -- read pattern type

a.ptype ¬ IO.GetTokenRope[s, IO.TokenProc].token;

ch ¬ IO.GetChar[s];

IF ch # ') THEN ERROR SyntaxError[Rope.Concat["unmatched ()'s: last read ", a.ptype]];

ch ¬ IO.GetChar[s];

};

IF ch # ': THEN ERROR SyntaxError[Rope.Concat["missing colon: last read ", a.ptype]];

[] ¬ IO.SkipWhitespace[stream: s, flushComments: FALSE];

IF IO.PeekChar[s] = '" THEN {

a.attr.value ¬ IO.GetRopeLiteral[s ! IO.Error => ERROR SyntaxError["Malformed rope literal"]];

}

ELSE

a.attr.value ¬ IO.GetTokenRope[s, TokenProc].token;

EXITS

none => RETURN[NIL];

};

ReadAttributePatterns: PUBLIC PROC [s: IO.STREAM] RETURNS [ap: AttributePatterns] ~ {

Read attribute patterns until there are no more and build up a list of them.

a: AttributePattern;

endOfAp: AttributePatterns ¬ NIL;

a ¬ ReadAttributePattern[s];

WHILE a # NIL DO

IF endOfAp = NIL THEN

ap ¬ endOfAp ¬ LIST[a]

ELSE {

endOfAp.rest ¬ LIST[a];

endOfAp ¬ endOfAp.rest;

};

a ¬ ReadAttributePattern[s];

ENDLOOP;

};

WriteAttributePattern: PROC [s: IO.STREAM, a: AttributePattern] RETURNS [] ~ {

IO.PutRope[s, Atom.GetPName[a.attr.type]];

IF a.ptype # NIL THEN IO.PutF1[s, "(%g)", IO.rope[a.ptype]];

IO.PutF1[s, ": ""%g"" ", IO.rope[a.attr.value]];

};

WriteAttributePatterns: PUBLIC PROC [s: IO.STREAM, ap: AttributePatterns] RETURNS [] ~ {

FOR p: AttributePatterns ¬ ap, p.rest WHILE p # NIL DO

WriteAttributePattern[s, p.first];

ENDLOOP;

};

PatternsToEntry: PUBLIC PROC [ap: AttributePatterns] RETURNS [entry: LoganBerry.Entry] ~ {

endOfEntry: LoganBerry.Entry ¬ NIL;

FOR p: AttributePatterns ¬ ap, p.rest WHILE p # NIL DO

IF endOfEntry = NIL THEN

entry ¬ endOfEntry ¬ LIST[p.first.attr]

ELSE {

endOfEntry.rest ¬ LIST[p.first.attr];

endOfEntry ¬ endOfEntry.rest;

};

ENDLOOP;

};

EntryToPatterns: PUBLIC PROC [entry: LoganBerry.Entry] RETURNS [ap: AttributePatterns] ~ {

pattern: AttributePattern;

endOfPattern: AttributePatterns ¬ NIL;

FOR e: LoganBerry.Entry ¬ entry, e.rest WHILE e # NIL DO

pattern ¬ NEW[AttributePatternRec ¬ [attr: e.first]];

IF endOfPattern = NIL THEN

ap ¬ endOfPattern ¬ LIST[pattern]

ELSE {

endOfPattern.rest ¬ LIST[pattern];

endOfPattern ¬ endOfPattern.rest;

};

ENDLOOP;

};

Boolean attribute-based queries

Several LoganQuery classes are composed in various ways to implement a boolean query filter.

BooleanFilterEntries: PUBLIC PROC [input: Cursor, query: ParseTree, inputOrder: LoganBerry.AttributeType, primaryKey: LoganBerry.AttributeType, defaultPattern: PatternType ¬ "DWIM"] RETURNS [output: Cursor] ~ {

Returns a cursor of class $BooleanFilter. Retrievals using this new cursor match the boolean query against entries returned by the input cursor. The query tree should be the result of ParseBooleanQuery. The inputOrder specifies the attribute by which entries on the input cursor are ordered, while primaryKey indicates the attribute for which these entries have unique values. The defaultPattern parameter indicates which type of pattern matching to use if this is not explictly specified in an attribute pattern. The input cursor could be obtained from QueryEntries (or a variety of operations in LoganQueryClass such as GenerateEntries).

Actually there is no $BooleanFilter class. The output cursor returned is of class $Aborter so that queries using this cursor can be aborted.

output ¬ CursorForSubtree[input, query, inputOrder, primaryKey, defaultPattern];

output ¬ LoganQueryClass.EnableAborts[input: output];

};

CursorForSubtree: PROC [input: Cursor, node: ParseNode, order, key: LoganBerry.AttributeType, dp: PatternType] RETURNS [output: Cursor] ~ {

This builds up the cursor structure that implements the boolean query rooted at the given node. It calls itself recursively.

LookupFilter: PROC [ap: AttributePattern, default: PatternType] RETURNS [proc: PatternMatch.MatchProc] ~ {

IF ap.ptype=NIL THEN ap.ptype ¬ default;

IF Rope.Equal[ap.ptype, "DWIM", FALSE] THEN

ap.ptype ¬ PatternMatch.DWIM[ap.attr.value];

proc ¬ PatternMatch.Lookup[ap.ptype];

};

SELECT node.tag FROM

$and => {

For computing ANDs, we simply pipeline the two subquery cursors. That is, the output of one query feeds into the input of the other.

output1: Cursor ¬ CursorForSubtree[input, node.child1, order, key, dp];

output ¬ CursorForSubtree[output1, node.child2, order, key, dp];

};

$or => {

For computing ORs, we duplicate the input cursor, run one instance through one subquery's cursor pipeline and the other through the other subquery's. Then the two cursors are merged back together and duplicates are eliminated.

input1, input2, output1, output2: Cursor;

[input1, input2] ¬ LoganQueryClass.DuplicateEntries[input];

output1 ¬ CursorForSubtree[input1, node.child1, order, key, dp];

output2 ¬ CursorForSubtree[input2, node.child2, order, key, dp];

output ¬ LoganQueryClass.MergeEntries[output1, output2, order];

IF order = key THEN

output ¬ LoganQueryClass.UnDuplicateEntries[output, order];

Note: UnDuplicateEntries wants its input cursor to be ordered by the primary key. If this is not the case, as it may not be, then we do not call UnDuplicateEntries and hence the client may see duplicate entries in the output cursor. This should be fixed someday, perhaps by modifying UnDuplicateEntries to take both an order and key parameter ala SubtractEntries.

};

$not => {

There are two cases of NOTs: In the simple case, the child is simply an attribute pattern; the entries are run through an antifilter. The complex case handles more general subqueries but is more expensive; we duplicate the input cursor, run one instance through the subquery's cursor pipeline and the other directly into a subtractor, which finds those entries that do not match the subquery.

input1, fullinput, output1: Cursor;

IF node.child1.tag = $ap THEN { -- simple case

output ¬ LoganQueryClass.AntiFilterEntries[input: input, pattern: node.child1.ap.attr.value, filter: LookupFilter[node.child1.ap, dp], atype: node.child1.ap.attr.type];

RETURN [output];

};

[input1, fullinput] ¬ LoganQueryClass.DuplicateEntries[input];

output1 ¬ CursorForSubtree[input1, node.child1, order, key, dp];

output ¬ LoganQueryClass.SubtractEntries[fullinput, output1, key, order];

};

$ap => {

In this case, the entries are simply filtered according to the given attribute pattern.

output ¬ LoganQueryClass.FilterEntries[input: input, pattern: node.ap.attr.value, filter: LookupFilter[node.ap, dp], atype: node.ap.attr.type];

};

ENDCASE;

};

Parsing boolean queries

The grammar for boolean queries is as follows:

query ::= boolexpr | boolexpr query

boolexpr ::= term | term "OR" boolexpr

term ::= factor | factor "AND" term

factor ::= attribute-pattern | "NOT" factor | "(" boolexpr ")"

attribute-pattern ::= attribute-type ":" attribute-value |
attribute-type "(" pattern ")" ":" attribute-value

A recursive decent parser is used to parse queries that comply with this grammar. For each non-terminal in the grammar there is a parse routine defined below that accepts the right-hand-side of the production from the input stream and returns a parse tree for that portion of the query.

TokenStream: TYPE = REF TokenStreamRec;

TokenStreamRec: TYPE = RECORD [

stream: STREAM,

prefetch: ROPE ← NIL

];

ParseBooleanQuery: PUBLIC PROC [s: IO.STREAM] RETURNS [tree: ParseTree] ~ {

Returns the parse tree for the query read from the given stream. Raises SyntaxError if the query is not a proper boolean query.

ts: TokenStream ¬ NEW[TokenStreamRec ¬ [s]];

tree ¬ GetQuery[ts];

};

GetQuery: PROC [ts: TokenStream] RETURNS [tree: ParseTree] ~ {

query ::= boolexpr | boolexpr query

subTree: ParseTree ¬ GetBoolexpr[ts];

IF PeekToken[ts] # NIL THEN {

tree ¬ NEW[ParseNodeRec ¬ [tag: $and]];

tree.child1 ¬ subTree;

tree.child2 ¬ GetQuery[ts];

} ELSE {

tree ¬ subTree;

};

GetBoolexpr: PROC [ts: TokenStream] RETURNS [tree: ParseTree] ~ {

boolexpr ::= term | term "OR" boolexpr

subTree: ParseTree ¬ GetTerm[ts];

IF Rope.Equal[PeekToken[ts], "OR", FALSE] THEN {

[] ¬ NextToken[ts];

tree ¬ NEW[ParseNodeRec ¬ [tag: $or]];

tree.child1 ¬ subTree;

tree.child2 ¬ GetBoolexpr[ts];

} ELSE {

tree ¬ subTree;

};

GetTerm: PROC [ts: TokenStream] RETURNS [tree: ParseTree] ~ {

term ::= factor | factor "AND" term

subTree: ParseTree ¬ GetFactor[ts];

IF Rope.Equal[PeekToken[ts], "AND", FALSE] THEN {

[] ¬ NextToken[ts];

tree ¬ NEW[ParseNodeRec ¬ [tag: $and]];

tree.child1 ¬ subTree;

tree.child2 ¬ GetTerm[ts];

} ELSE {

tree ¬ subTree;

};

GetFactor: PROC [ts: TokenStream] RETURNS [tree: ParseTree] ~ {

factor ::= attribute-pattern | "NOT" factor | "(" boolexpr ")"

token: ROPE ¬ PeekToken[ts];

SELECT TRUE FROM

Rope.Equal[token, "NOT", FALSE] => {

token ¬ NextToken[ts];

tree ¬ NEW[ParseNodeRec ¬ [tag: $not]];

tree.child1 ¬ GetFactor[ts];

};

Rope.Equal[token, "("] => {

token ¬ NextToken[ts];

tree ¬ GetBoolexpr[ts];

token ¬ NextToken[ts];

IF NOT Rope.Equal[token, ")"] THEN

ERROR SyntaxError["Unmatched parenthesis"];

};

ENDCASE => { -- attribute-pattern

tree ¬ GetAttrributePattern[ts];

};

GetAttrributePattern: PROC [ts: TokenStream] RETURNS [tree: ParseTree] ~ {

attribute-pattern ::= attribute-type ":" attribute-value |
attribute-type "(" pattern ")" ":" attribute-value

tree ¬ NEW[ParseNodeRec ¬ [tag: $ap]];

tree.ap ¬ ReadAttributePattern[ts.stream, ts.prefetch];

IF tree.ap = NIL THEN

ERROR SyntaxError["Unexpected end of input"];

ts.prefetch ¬ NIL;

};

TokenProc: IO.BreakProc ~ {

RETURN[SELECT char FROM

IN [Ascii.NUL .. Ascii.SP] => sepr,

'(, '), ': => break,

ENDCASE => other];

};

NextToken: PROC [ts: TokenStream] RETURNS [token: ROPE] ~ {

IF ts.prefetch # NIL

THEN token ¬ ts.prefetch

ELSE token ¬ IO.GetTokenRope[ts.stream, TokenProc ! IO.EndOfStream => {token ¬ NIL; CONTINUE}].token;

ts.prefetch ¬ NIL;

};

PeekToken: PROC [ts: TokenStream] RETURNS [token: ROPE] ~ {

IF ts.prefetch = NIL THEN

ts.prefetch ¬ IO.GetTokenRope[ts.stream, TokenProc ! IO.EndOfStream => CONTINUE].token;

token ¬ ts.prefetch;

};

Aborting queries

AbortQuery: PUBLIC PROC [cursor: Cursor] ~ {

If a process is currently executing a NextEntry operation on the given cursor, the process is aborted and the operation returns NIL; any subsequent calls to NextEntry will also return NIL (i.e. the cursor is no longer useable). This can be used on cursors returned by either QueryEntries or BooleanFilterEntries.

LoganQueryClass.AbortQuery[cursor];

};

Miscellaneous

ParseSubrange: PROC [r: ROPE] RETURNS [start, end: ROPE ¬ NIL] ~ {

r should be of the form "start-end".

ENABLE IO.EndOfStream, IO.Error => GOTO Bad;

ToDash: IO.BreakProc = {

[char: CHAR] RETURNS [IO.CharClass]

RETURN[SELECT char FROM

'- => sepr,

ENDCASE => other];

};

s: IO.STREAM ¬ IO.RIS[r];

start ¬ IF IO.PeekChar[s] = '" THEN IO.GetRopeLiteral[s] ELSE IO.GetTokenRope[s, ToDash].token;

IF IO.GetChar[s] # '- THEN GOTO Bad;

end ¬ IF IO.PeekChar[s] = '" THEN IO.GetRopeLiteral[s] ELSE IO.GetLineRope[s];

EXITS

Bad => ERROR SyntaxError["Not a subrange"];

};

END.