DIRECTORY Basics USING [charsPerWord, UnsafeBlock], BasicTime USING [GetClockPulses, Pulses, PulsesToSeconds], Commander USING [CommandProc, Register], CommandTool USING [Failed, ParseToList], FS USING [EnumerateForNames, Error, NameProc, StreamOpen], IO USING [Close, EndOf, PutF, PutFR, PutRope, STREAM, UnsafeGetBlock, UnsafePutBlock], PrincOpsUtils USING [LongCopy], Process USING [CheckForAbort], Rope USING [Concat, Fetch, Length, ROPE, ToRefText], VM USING [AddressForPageNumber, Allocate, bytesPerPage, Free, Interval]; QFind: CEDAR PROGRAM IMPORTS BasicTime, Commander, CommandTool, FS, IO, PrincOpsUtils, Process, Rope, VM = { STREAM: TYPE = IO.STREAM; ROPE: TYPE = Rope.ROPE; overlap: CARDINAL = 150; -- keep this many chars from previous buffer so we can try to back up to beginning of line when we find a match (must be even) bufferPages: NAT = 40; bufferSize: CARDINAL = bufferPages*VM.bytesPerPage; Buffer: TYPE = RECORD [PACKED SEQUENCE COMPUTED CARDINAL OF CHAR]; Map: TYPE = PACKED ARRAY CHAR OF CHAR; OpenFile: PROC [name: ROPE] RETURNS [st: STREAM] = { st _ FS.StreamOpen[name, $read ! FS.Error => IF error.group # bug THEN CONTINUE]}; SearchListOfFiles: Commander.CommandProc = { log: IO.STREAM _ cmd.err; out: IO.STREAM _ cmd.out; inputList: LIST OF ROPE; buffer: LONG POINTER TO Buffer; vmInt: VM.Interval; key, file: ROPE; bm: BMhandle; abort: BOOL; matches, files: INT _ 0; startTime: BasicTime.Pulses _ BasicTime.GetClockPulses[]; map: REF Map; caseless: BOOL _ TRUE; tkey: REF TEXT; SearchFile: FS.NameProc = { fMatches: INT; [abort: abort, matches: fMatches] _ BoyerMooreSearch[file: fullFName, bm: bm, buffer: buffer, results: out, map: map]; files _ files + 1; matches _ matches + fMatches; continue _ ~abort; }; inputList _ CommandTool.ParseToList[cmd: cmd, starExpand: FALSE, switchChar: '- ! CommandTool.Failed => {log.PutRope["invalid input format\n"]; GO TO quit}].list; IF inputList = NIL THEN RETURN[NIL, "No input specified"]; map _ NEW [Map]; FOR c: CHAR IN CHAR DO map[c] _ IF c IN ['a..'z] THEN c - 'a + 'A ELSE c; ENDLOOP; DO key _ inputList.first; inputList _ inputList.rest; IF Rope.Fetch[key, 0] = '- THEN { SELECT Rope.Fetch[key, 1] FROM 'c, 'C => FOR c: CHAR IN ['a..'z] DO map[c] _ c ENDLOOP; ENDCASE => { log.PutF["invalid switch: $g", [character[Rope.Fetch[key, 1]]]]; GO TO quit}; LOOP}; EXIT; ENDLOOP; tkey _ Rope.ToRefText[key]; FOR i: CARDINAL IN [0..tkey.length) DO tkey[i] _ map[tkey[i]]; ENDLOOP; bm _ MakeFailureFunctions[tkey]; vmInt _ VM.Allocate[bufferPages]; buffer _ LOOPHOLE[VM.AddressForPageNumber[vmInt.page]]; WHILE inputList # NIL DO ENABLE UNWIND => TRUSTED { --give buffer back VM.Free[vmInt]}; pattern: ROPE _ DefaultToHighestGeneration[inputList.first]; file _ inputList.first; inputList _ inputList.rest; FS.EnumerateForNames[pattern, SearchFile ! FS.Error => IF error.group # bug THEN { out.PutF["** %g\n", [rope[error.explanation]] ]; LOOP}; ]; IF abort THEN EXIT; ENDLOOP; TRUSTED {VM.Free[vmInt]}; IF abort THEN out.PutRope["\nABORTED\n"]; RETURN[ IF abort THEN $aborted ELSE NIL, IO.PutFR["%g files, %g matches, %g seconds", [integer[files]], [integer[matches]], [real[BasicTime.PulsesToSeconds[BasicTime.GetClockPulses[] - startTime]]]]]; EXITS quit => RETURN[$Failure, NIL]; }; DefaultToHighestGeneration: PROC [filePattern: ROPE] RETURNS [ROPE] = { len: INT _ Rope.Length[filePattern]; bang: INT _ len; star: INT _ len; dot: INT _ len; pos: INT _ len; WHILE pos > 0 DO c: CHAR _ Rope.Fetch[filePattern, pos _ pos - 1]; SELECT c FROM '! => bang _ pos; '. => {dot _ pos; EXIT}; '* => IF star = len THEN star _ pos; '>, '] => EXIT; ENDCASE; ENDLOOP; SELECT TRUE FROM dot = len AND star = len AND bang = len => filePattern _ Rope.Concat[filePattern, ".mesa!h"]; bang = len => filePattern _ Rope.Concat[filePattern, "!h"]; ENDCASE; RETURN[filePattern]; }; Delta1: TYPE = ARRAY CHAR OF CARDINAL; Delta2: TYPE = RECORD [SEQUENCE COMPUTED CARDINAL OF CARDINAL]; BMhandle: TYPE = REF BMdata; BMdata: TYPE = RECORD [ pattern: REF TEXT, delta1: REF Delta1, delta2: REF Delta2]; MakeFailureFunctions: PROCEDURE [key: REF TEXT] RETURNS [BMhandle] = { c: CARDINAL; length: CARDINAL = key.length; t: CARDINAL _ length; delta1: REF Delta1 = NEW[Delta1]; delta2: REF Delta2 = NEW[Delta2[length]]; aux: REF Delta2 _ NEW[Delta2[length]]; FOR ch: CHAR IN CHAR DO delta1[ch] _ length+1 ENDLOOP; FOR c IN [0..length) DO delta2[c] _ length + (delta1[key[c]] _ length - c); ENDLOOP; FOR c DECREASING IN [0..length) DO aux[c] _ t; WHILE t < length AND key[c] # key[t] DO t _ aux[t]; ENDLOOP; t _ t - 1; delta2[t] _ MIN[delta2[t], length - (c-1)]; ENDLOOP; FOR c IN [0..t] DO delta2[c] _ MIN[delta2[c], length+1+t-c] ENDLOOP; RETURN [NEW[BMdata _ [pattern: key, delta1: delta1, delta2: delta2]]]}; BoyerMooreSearch: PROC [file: ROPE, bm: BMhandle, buffer: LONG POINTER TO Buffer, results: STREAM, map: REF Map] RETURNS [abort: BOOLEAN _ FALSE, matches: LONG CARDINAL _ 0] = TRUSTED { ENABLE ABORTED => GO TO aborted; stream: STREAM; checkAbort: PROC RETURNS [a: BOOL _ FALSE] = TRUSTED { Process.CheckForAbort[!ABORTED => {a _ TRUE; CONTINUE}]}; firstMatch: BOOL _ TRUE; delta1: REF Delta1 = bm.delta1; delta2: REF Delta2 = bm.delta2; pattern: REF TEXT = bm.pattern; length: CARDINAL = pattern.length; stopIndexPlusOne: INT _ bufferSize-overlap; block: Basics.UnsafeBlock _ [LOOPHOLE[buffer], 0, stopIndexPlusOne - 0]; index: CARDINAL _ length; bytes: LONG CARDINAL _ 0; winning: CARDINAL _ 0; -- if # 0, we're in the middle of printing a match, and are willing to scan this many more chars looking for end of line BEGIN -- for ENABLE purposes ENABLE ABORTED => IF stream # NIL THEN stream.Close[]; stream _ OpenFile[file]; IF stream = NIL THEN { results.PutF["%g cannot be opened\n", [rope[file]]]; Process.CheckForAbort[]; -- may raise ABORTED caught above RETURN[FALSE, 0]}; DO UNTIL index <= block.startIndex DO -- get enough characters to look at got: CARDINAL; IF stream.EndOf[] OR abort OR (abort _ checkAbort[]) THEN {IF winning # 0 THEN results.PutRope["\n"]; stream.Close[]; RETURN}; IF block.startIndex = stopIndexPlusOne THEN { PrincOpsUtils.LongCopy[ to: buffer, from: buffer + (stopIndexPlusOne-overlap) / Basics.charsPerWord, nwords: overlap/Basics.charsPerWord]; index _ index - (stopIndexPlusOne-overlap); block.startIndex _ overlap; stopIndexPlusOne _ bufferSize}; block.count _ stopIndexPlusOne - block.startIndex; got _ stream.UnsafeGetBlock[block]; block.startIndex _ block.startIndex + got; bytes _ bytes + got; IF winning # 0 THEN [index, winning] _ ReportMatch[results, block, index, winning]; ENDLOOP; FOR keyIndex: CARDINAL DECREASING IN [0..length) DO IF map[buffer[index _ index-1]] # pattern[keyIndex] THEN { index _ index + MAX[delta1[map[buffer[index]]], delta2[keyIndex]]; EXIT}; REPEAT FINISHED => { -- found a match matches _ matches + 1; IF firstMatch THEN { firstMatch _ FALSE; results.PutF["%g\n", [rope[file]]]}; [index, winning] _ ReportMatch[ results, block, index --+length-1--, 0, bytes+index --+length-1-- -block.startIndex]; index _ (IF abort _ checkAbort[] THEN block.startIndex ELSE index) + 2}; ENDLOOP; ENDLOOP; END; -- of nested ABORTED catchphrase EXITS aborted => {RETURN [TRUE, matches]}; }; ReportMatch: PROC [ report: STREAM, block: Basics.UnsafeBlock, at, limit: INT, total: INT _ 0] RETURNS [lastUsed, waitingForCR: CARDINAL] = TRUSTED { extend: INT = IF limit = 0 THEN overlap ELSE limit; backTo: INT = MAX[at, extend] - extend; forwardTo: INT = MIN[at+extend, block.startIndex]; IF limit = 0 THEN { -- initial match as opposed to later search for closing CR report.PutF["%5g: ", [cardinal[total]]]; FOR cr: INT DECREASING IN [backTo..at) DO IF block.base[cr] = ('\n-0C) THEN { report.UnsafePutBlock[[block.base, cr+1, at - cr-1]]; EXIT}; REPEAT FINISHED => { IF total > extend THEN report.PutRope["..."]; report.UnsafePutBlock[[block.base, backTo, at - backTo]]}; ENDLOOP}; FOR cr: INT IN [at..forwardTo) DO IF block.base[cr] = ('\n-0C) THEN { report.UnsafePutBlock[[block.base, at, cr+1 - at]]; RETURN [cr, 0]}; ENDLOOP; report.UnsafePutBlock[[block.base, at, forwardTo - at]]; IF (waitingForCR _ at+extend - forwardTo) = 0 THEN report.PutRope["...\n"]; lastUsed _ forwardTo - 1}; Commander.Register[ "QFind", SearchListOfFiles, "Search list of files for occurrances of a string token usage: QFind [-c] "]; }. œQFind.mesa Copyright c 1985 by Xerox Corporation. All rights reserved. Sweet December 5, 1985 10:52:28 am PST [cmd: Commander.Handle] RETURNS [result: REF ANY _ NIL, msg: ROPE _ NIL] ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- Boyer-Moore parsing: construct tables of offsets for when match fails ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- See Knuth, Morris, Pratt, "Fast Pattern...", SIAM J. Comp, June 1977 1/12/84:RSS Above algorithm (dd') fails for patterns in which all characters are identical. Changed implementation to calculate dd instead of dd' in p342 algorithm. block points to remaining empty buffer beyond file contents currently there ReportMatch ignores the count field of block ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- Command Line Registration ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- Κ ˜codešœ ™ Kšœ Οmœ1™žœžœ ˜P—Kš žœ žœžœžœžœ˜:Kšœžœ˜š žœžœžœžœž˜Kš œ žœžœ žœ žœ˜2Kšžœ˜—šžœ˜Kšœ2˜2šžœžœ˜!šžœž˜Kš œ žœžœžœ žœ žœ˜8šžœ˜ KšœAžœžœ˜M——Kšžœ˜—Kšžœ˜Kšžœ˜—Kšœ˜šžœžœžœž˜&K˜Kšžœ˜—Kšœ ˜ Kšœžœ˜!Kšœ žœžœ#˜7šžœ žœž˜šžœ˜šžœžœŸ˜&Kšžœ˜——Kšœ žœ/˜