<> <> <> <<>> DIRECTORY Basics USING [charsPerWord, UnsafeBlock], BasicTime USING [GetClockPulses, Pulses, PulsesToSeconds], Commander USING [CommandProc, Register], CommandTool USING [Failed, ParseToList], FS USING [EnumerateForNames, Error, NameProc, StreamOpen], IO USING [Close, EndOf, PutF, PutFR, PutRope, STREAM, UnsafeGetBlock, UnsafePutBlock], PrincOpsUtils USING [LongCopy], Process USING [CheckForAbort], Rope USING [Concat, Fetch, Length, ROPE, ToRefText], VM USING [AddressForPageNumber, Allocate, bytesPerPage, Free, Interval]; QFind: CEDAR PROGRAM IMPORTS BasicTime, Commander, CommandTool, FS, IO, PrincOpsUtils, Process, Rope, VM = { STREAM: TYPE = IO.STREAM; ROPE: TYPE = Rope.ROPE; overlap: CARDINAL = 150; -- keep this many chars from previous buffer so we can try to back up to beginning of line when we find a match (must be even) bufferPages: NAT = 40; bufferSize: CARDINAL = bufferPages*VM.bytesPerPage; Buffer: TYPE = RECORD [PACKED SEQUENCE COMPUTED CARDINAL OF CHAR]; Map: TYPE = PACKED ARRAY CHAR OF CHAR; OpenFile: PROC [name: ROPE] RETURNS [st: STREAM] = { st _ FS.StreamOpen[name, $read ! FS.Error => IF error.group # bug THEN CONTINUE]}; SearchListOfFiles: Commander.CommandProc = { <<[cmd: Commander.Handle] RETURNS [result: REF ANY _ NIL, msg: ROPE _ NIL]>> log: IO.STREAM _ cmd.err; out: IO.STREAM _ cmd.out; inputList: LIST OF ROPE; buffer: LONG POINTER TO Buffer; vmInt: VM.Interval; key, file: ROPE; bm: BMhandle; abort: BOOL; matches, files: INT _ 0; startTime: BasicTime.Pulses _ BasicTime.GetClockPulses[]; map: REF Map; caseless: BOOL _ TRUE; tkey: REF TEXT; SearchFile: FS.NameProc = { fMatches: INT; [abort: abort, matches: fMatches] _ BoyerMooreSearch[file: fullFName, bm: bm, buffer: buffer, results: out, map: map]; files _ files + 1; matches _ matches + fMatches; continue _ ~abort; }; inputList _ CommandTool.ParseToList[cmd: cmd, starExpand: FALSE, switchChar: '- ! CommandTool.Failed => {log.PutRope["invalid input format\n"]; GO TO quit}].list; IF inputList = NIL THEN RETURN[NIL, "No input specified"]; map _ NEW [Map]; FOR c: CHAR IN CHAR DO map[c] _ IF c IN ['a..'z] THEN c - 'a + 'A ELSE c; ENDLOOP; DO key _ inputList.first; inputList _ inputList.rest; IF Rope.Fetch[key, 0] = '- THEN { SELECT Rope.Fetch[key, 1] FROM 'c, 'C => FOR c: CHAR IN ['a..'z] DO map[c] _ c ENDLOOP; ENDCASE => { log.PutF["invalid switch: $g", [character[Rope.Fetch[key, 1]]]]; GO TO quit}; LOOP}; EXIT; ENDLOOP; tkey _ Rope.ToRefText[key]; FOR i: CARDINAL IN [0..tkey.length) DO tkey[i] _ map[tkey[i]]; ENDLOOP; bm _ MakeFailureFunctions[tkey]; vmInt _ VM.Allocate[bufferPages]; buffer _ LOOPHOLE[VM.AddressForPageNumber[vmInt.page]]; WHILE inputList # NIL DO ENABLE UNWIND => TRUSTED { --give buffer back VM.Free[vmInt]}; pattern: ROPE _ DefaultToHighestGeneration[inputList.first]; file _ inputList.first; inputList _ inputList.rest; FS.EnumerateForNames[pattern, SearchFile ! FS.Error => IF error.group # bug THEN { out.PutF["** %g\n", [rope[error.explanation]] ]; LOOP}; ]; IF abort THEN EXIT; ENDLOOP; TRUSTED {VM.Free[vmInt]}; IF abort THEN out.PutRope["\nABORTED\n"]; RETURN[ IF abort THEN $aborted ELSE NIL, IO.PutFR["%g files, %g matches, %g seconds", [integer[files]], [integer[matches]], [real[BasicTime.PulsesToSeconds[BasicTime.GetClockPulses[] - startTime]]]]]; EXITS quit => RETURN[$Failure, NIL]; }; DefaultToHighestGeneration: PROC [filePattern: ROPE] RETURNS [ROPE] = { len: INT _ Rope.Length[filePattern]; bang: INT _ len; star: INT _ len; dot: INT _ len; pos: INT _ len; WHILE pos > 0 DO c: CHAR _ Rope.Fetch[filePattern, pos _ pos - 1]; SELECT c FROM '! => bang _ pos; '. => {dot _ pos; EXIT}; '* => IF star = len THEN star _ pos; '>, '] => EXIT; ENDCASE; ENDLOOP; SELECT TRUE FROM dot = len AND star = len AND bang = len => filePattern _ Rope.Concat[filePattern, ".mesa!h"]; bang = len => filePattern _ Rope.Concat[filePattern, "!h"]; ENDCASE; RETURN[filePattern]; }; <<---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---->> <> <<---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---->> Delta1: TYPE = ARRAY CHAR OF CARDINAL; Delta2: TYPE = RECORD [SEQUENCE COMPUTED CARDINAL OF CARDINAL]; BMhandle: TYPE = REF BMdata; BMdata: TYPE = RECORD [ pattern: REF TEXT, delta1: REF Delta1, delta2: REF Delta2]; MakeFailureFunctions: PROCEDURE [key: REF TEXT] RETURNS [BMhandle] = { <> <<1/12/84:RSS Above algorithm (dd') fails for patterns in which all characters are identical. Changed implementation to calculate dd instead of dd' in p342 algorithm.>> c: CARDINAL; length: CARDINAL = key.length; t: CARDINAL _ length; delta1: REF Delta1 = NEW[Delta1]; delta2: REF Delta2 = NEW[Delta2[length]]; aux: REF Delta2 _ NEW[Delta2[length]]; FOR ch: CHAR IN CHAR DO delta1[ch] _ length+1 ENDLOOP; FOR c IN [0..length) DO delta2[c] _ length + (delta1[key[c]] _ length - c); ENDLOOP; FOR c DECREASING IN [0..length) DO aux[c] _ t; WHILE t < length AND key[c] # key[t] DO t _ aux[t]; ENDLOOP; t _ t - 1; delta2[t] _ MIN[delta2[t], length - (c-1)]; ENDLOOP; FOR c IN [0..t] DO delta2[c] _ MIN[delta2[c], length+1+t-c] ENDLOOP; RETURN [NEW[BMdata _ [pattern: key, delta1: delta1, delta2: delta2]]]}; BoyerMooreSearch: PROC [file: ROPE, bm: BMhandle, buffer: LONG POINTER TO Buffer, results: STREAM, map: REF Map] RETURNS [abort: BOOLEAN _ FALSE, matches: LONG CARDINAL _ 0] = TRUSTED { ENABLE ABORTED => GO TO aborted; stream: STREAM; checkAbort: PROC RETURNS [a: BOOL _ FALSE] = TRUSTED { Process.CheckForAbort[!ABORTED => {a _ TRUE; CONTINUE}]}; firstMatch: BOOL _ TRUE; delta1: REF Delta1 = bm.delta1; delta2: REF Delta2 = bm.delta2; pattern: REF TEXT = bm.pattern; length: CARDINAL = pattern.length; stopIndexPlusOne: INT _ bufferSize-overlap; block: Basics.UnsafeBlock _ [LOOPHOLE[buffer], 0, stopIndexPlusOne - 0]; <> <> index: CARDINAL _ length; bytes: LONG CARDINAL _ 0; winning: CARDINAL _ 0; -- if # 0, we're in the middle of printing a match, and are willing to scan this many more chars looking for end of line BEGIN -- for ENABLE purposes ENABLE ABORTED => IF stream # NIL THEN stream.Close[]; stream _ OpenFile[file]; IF stream = NIL THEN { results.PutF["%g cannot be opened\n", [rope[file]]]; Process.CheckForAbort[]; -- may raise ABORTED caught above RETURN[FALSE, 0]}; DO UNTIL index <= block.startIndex DO -- get enough characters to look at got: CARDINAL; IF stream.EndOf[] OR abort OR (abort _ checkAbort[]) THEN {IF winning # 0 THEN results.PutRope["\n"]; stream.Close[]; RETURN}; IF block.startIndex = stopIndexPlusOne THEN { PrincOpsUtils.LongCopy[ to: buffer, from: buffer + (stopIndexPlusOne-overlap) / Basics.charsPerWord, nwords: overlap/Basics.charsPerWord]; index _ index - (stopIndexPlusOne-overlap); block.startIndex _ overlap; stopIndexPlusOne _ bufferSize}; block.count _ stopIndexPlusOne - block.startIndex; got _ stream.UnsafeGetBlock[block]; block.startIndex _ block.startIndex + got; bytes _ bytes + got; IF winning # 0 THEN [index, winning] _ ReportMatch[results, block, index, winning]; ENDLOOP; FOR keyIndex: CARDINAL DECREASING IN [0..length) DO IF map[buffer[index _ index-1]] # pattern[keyIndex] THEN { index _ index + MAX[delta1[map[buffer[index]]], delta2[keyIndex]]; EXIT}; REPEAT FINISHED => { -- found a match matches _ matches + 1; IF firstMatch THEN { firstMatch _ FALSE; results.PutF["%g\n", [rope[file]]]}; [index, winning] _ ReportMatch[ results, block, index --+length-1--, 0, bytes+index --+length-1-- -block.startIndex]; index _ (IF abort _ checkAbort[] THEN block.startIndex ELSE index) + 2}; ENDLOOP; ENDLOOP; END; -- of nested ABORTED catchphrase EXITS aborted => {RETURN [TRUE, matches]}; }; ReportMatch: PROC [ report: STREAM, block: Basics.UnsafeBlock, at, limit: INT, total: INT _ 0] RETURNS [lastUsed, waitingForCR: CARDINAL] = TRUSTED { extend: INT = IF limit = 0 THEN overlap ELSE limit; backTo: INT = MAX[at, extend] - extend; forwardTo: INT = MIN[at+extend, block.startIndex]; IF limit = 0 THEN { -- initial match as opposed to later search for closing CR report.PutF["%5g: ", [cardinal[total]]]; FOR cr: INT DECREASING IN [backTo..at) DO IF block.base[cr] = ('\n-0C) THEN { report.UnsafePutBlock[[block.base, cr+1, at - cr-1]]; EXIT}; REPEAT FINISHED => { IF total > extend THEN report.PutRope["..."]; report.UnsafePutBlock[[block.base, backTo, at - backTo]]}; ENDLOOP}; FOR cr: INT IN [at..forwardTo) DO IF block.base[cr] = ('\n-0C) THEN { report.UnsafePutBlock[[block.base, at, cr+1 - at]]; RETURN [cr, 0]}; ENDLOOP; report.UnsafePutBlock[[block.base, at, forwardTo - at]]; IF (waitingForCR _ at+extend - forwardTo) = 0 THEN report.PutRope["...\n"]; lastUsed _ forwardTo - 1}; <<---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---->> <> <<---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---->> <<>> Commander.Register[ "QFind", SearchListOfFiles, "Search list of files for occurrances of a string token usage: QFind [-c] "]; }.