DIRECTORY
Basics USING [charsPerWord, UnsafeBlock],
BasicTime USING [GetClockPulses, Pulses, PulsesToSeconds],
Commander USING [CommandProc, Register],
CommandTool USING [Failed, ParseToList],
FS USING [EnumerateForNames, Error, NameProc, StreamOpen],
IO USING [Close, EndOf, PutF, PutFR, PutRope, STREAM, UnsafeGetBlock, UnsafePutBlock],
PrincOpsUtils USING [LongCopy],
Process USING [CheckForAbort],
Rope USING [Concat, Fetch, Length, ROPE, ToRefText],
VM USING [AddressForPageNumber, Allocate, bytesPerPage, Free, Interval];
QFind:
CEDAR
PROGRAM
IMPORTS BasicTime, Commander, CommandTool, FS, IO, PrincOpsUtils, Process, Rope, VM = {
STREAM: TYPE = IO.STREAM;
ROPE: TYPE = Rope.ROPE;
overlap: CARDINAL = 150; -- keep this many chars from previous buffer so we can try to back up to beginning of line when we find a match (must be even)
bufferPages: NAT = 40;
bufferSize: CARDINAL = bufferPages*VM.bytesPerPage;
Buffer: TYPE = RECORD [PACKED SEQUENCE COMPUTED CARDINAL OF CHAR];
Map: TYPE = PACKED ARRAY CHAR OF CHAR;
OpenFile:
PROC [name:
ROPE]
RETURNS [st:
STREAM] = {
st ←
FS.StreamOpen[name, $read
! FS.Error => IF error.group # bug THEN CONTINUE]};
SearchListOfFiles: Commander.CommandProc = {
[cmd: Commander.Handle] RETURNS [result: REF ANY ← NIL, msg: ROPE ← NIL]
log: IO.STREAM ← cmd.err;
out: IO.STREAM ← cmd.out;
inputList: LIST OF ROPE;
buffer: LONG POINTER TO Buffer;
vmInt: VM.Interval;
key, file: ROPE;
bm: BMhandle;
abort: BOOL;
matches, files: INT ← 0;
startTime: BasicTime.Pulses ← BasicTime.GetClockPulses[];
map: REF Map;
caseless: BOOL ← TRUE;
tkey: REF TEXT;
SearchFile:
FS.NameProc = {
fMatches: INT;
[abort: abort, matches: fMatches] ← BoyerMooreSearch[file: fullFName, bm: bm, buffer: buffer, results: out, map: map];
files ← files + 1;
matches ← matches + fMatches;
continue ← ~abort;
};
inputList ← CommandTool.ParseToList[cmd: cmd, starExpand:
FALSE, switchChar: '- !
CommandTool.Failed => {log.PutRope["invalid input format\n"]; GO TO quit}].list;
IF inputList = NIL THEN RETURN[NIL, "No input specified"];
map ← NEW [Map];
FOR c:
CHAR
IN
CHAR
DO
map[c] ← IF c IN ['a..'z] THEN c - 'a + 'A ELSE c;
ENDLOOP;
DO
key ← inputList.first; inputList ← inputList.rest;
IF Rope.Fetch[key, 0] = '-
THEN {
SELECT Rope.Fetch[key, 1]
FROM
'c, 'C => FOR c: CHAR IN ['a..'z] DO map[c] ← c ENDLOOP;
ENDCASE => {
log.PutF["invalid switch: $g", [character[Rope.Fetch[key, 1]]]]; GO TO quit};
LOOP};
EXIT;
ENDLOOP;
tkey ← Rope.ToRefText[key];
FOR i:
CARDINAL
IN [0..tkey.length)
DO
tkey[i] ← map[tkey[i]];
ENDLOOP;
bm ← MakeFailureFunctions[tkey];
vmInt ← VM.Allocate[bufferPages];
buffer ← LOOPHOLE[VM.AddressForPageNumber[vmInt.page]];
WHILE inputList #
NIL
DO
ENABLE
UNWIND =>
TRUSTED {
--give buffer back
VM.Free[vmInt]};
pattern: ROPE ← DefaultToHighestGeneration[inputList.first];
file ← inputList.first; inputList ← inputList.rest;
FS.EnumerateForNames[pattern, SearchFile
!
FS.Error =>
IF error.group # bug
THEN {
out.PutF["** %g\n", [rope[error.explanation]] ];
LOOP};
];
IF abort THEN EXIT;
ENDLOOP;
TRUSTED {VM.Free[vmInt]};
IF abort THEN out.PutRope["\nABORTED\n"];
RETURN[
IF abort THEN $aborted ELSE NIL,
IO.PutFR["%g files, %g matches, %g seconds", [integer[files]], [integer[matches]], [real[BasicTime.PulsesToSeconds[BasicTime.GetClockPulses[] - startTime]]]]];
EXITS
quit => RETURN[$Failure, NIL];
};
DefaultToHighestGeneration:
PROC [filePattern:
ROPE]
RETURNS [
ROPE] = {
len: INT ← Rope.Length[filePattern];
bang: INT ← len;
star: INT ← len;
dot: INT ← len;
pos: INT ← len;
WHILE pos > 0
DO
c: CHAR ← Rope.Fetch[filePattern, pos ← pos - 1];
SELECT c
FROM
'! => bang ← pos;
'. => {dot ← pos; EXIT};
'* => IF star = len THEN star ← pos;
'>, '] => EXIT;
ENDCASE;
ENDLOOP;
SELECT
TRUE
FROM
dot = len
AND star = len
AND bang = len =>
filePattern ← Rope.Concat[filePattern, ".mesa!h"];
bang = len =>
filePattern ← Rope.Concat[filePattern, "!h"];
ENDCASE;
RETURN[filePattern];
};
---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
Boyer-Moore parsing: construct tables of offsets for when match fails
---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
Delta1: TYPE = ARRAY CHAR OF CARDINAL;
Delta2:
TYPE =
RECORD [
SEQUENCE
COMPUTED
CARDINAL
OF
CARDINAL];
BMhandle: TYPE = REF BMdata;
BMdata:
TYPE =
RECORD [
pattern: REF TEXT,
delta1: REF Delta1,
delta2: REF Delta2];
MakeFailureFunctions:
PROCEDURE [key:
REF
TEXT]
RETURNS [BMhandle] = {
See Knuth, Morris, Pratt, "Fast Pattern...", SIAM J. Comp, June 1977
1/12/84:RSS Above algorithm (dd') fails for patterns in which all characters are identical. Changed implementation to calculate dd instead of dd' in p342 algorithm.
c: CARDINAL;
length: CARDINAL = key.length;
t: CARDINAL ← length;
delta1: REF Delta1 = NEW[Delta1];
delta2: REF Delta2 = NEW[Delta2[length]];
aux: REF Delta2 ← NEW[Delta2[length]];
FOR ch: CHAR IN CHAR DO delta1[ch] ← length+1 ENDLOOP;
FOR c
IN [0..length)
DO
delta2[c] ← length + (delta1[key[c]] ← length - c);
ENDLOOP;
FOR c
DECREASING
IN [0..length)
DO
aux[c] ← t;
WHILE t < length
AND key[c] # key[t]
DO
t ← aux[t];
ENDLOOP;
t ← t - 1;
delta2[t] ← MIN[delta2[t], length - (c-1)];
ENDLOOP;
FOR c IN [0..t] DO delta2[c] ← MIN[delta2[c], length+1+t-c] ENDLOOP;
RETURN [NEW[BMdata ← [pattern: key, delta1: delta1, delta2: delta2]]]};
BoyerMooreSearch:
PROC [file:
ROPE, bm: BMhandle, buffer:
LONG
POINTER
TO Buffer, results:
STREAM, map:
REF Map]
RETURNS [abort: BOOLEAN ← FALSE, matches: LONG CARDINAL ← 0] = TRUSTED {
ENABLE ABORTED => GO TO aborted;
stream: STREAM;
checkAbort:
PROC
RETURNS [a:
BOOL ←
FALSE] =
TRUSTED {
Process.CheckForAbort[!ABORTED => {a ← TRUE; CONTINUE}]};
firstMatch: BOOL ← TRUE;
delta1: REF Delta1 = bm.delta1;
delta2: REF Delta2 = bm.delta2;
pattern: REF TEXT = bm.pattern;
length: CARDINAL = pattern.length;
stopIndexPlusOne: INT ← bufferSize-overlap;
block: Basics.UnsafeBlock ← [
LOOPHOLE[buffer], 0, stopIndexPlusOne - 0];
block points to remaining empty buffer beyond file contents currently there
ReportMatch ignores the count field of block
index: CARDINAL ← length;
bytes: LONG CARDINAL ← 0;
winning: CARDINAL ← 0; -- if # 0, we're in the middle of printing a match, and are willing to scan this many more chars looking for end of line
BEGIN -- for ENABLE purposes
ENABLE ABORTED => IF stream # NIL THEN stream.Close[];
stream ← OpenFile[file];
IF stream =
NIL
THEN {
results.PutF["%g cannot be opened\n", [rope[file]]];
Process.CheckForAbort[]; -- may raise ABORTED caught above
RETURN[FALSE, 0]};
DO
UNTIL index <= block.startIndex
DO
-- get enough characters to look at
got: CARDINAL;
IF stream.EndOf[]
OR abort
OR (abort ← checkAbort[])
THEN
{IF winning # 0 THEN results.PutRope["\n"]; stream.Close[]; RETURN};
IF block.startIndex = stopIndexPlusOne
THEN {
PrincOpsUtils.LongCopy[
to: buffer,
from: buffer + (stopIndexPlusOne-overlap)
/ Basics.charsPerWord,
nwords: overlap/Basics.charsPerWord];
index ← index - (stopIndexPlusOne-overlap);
block.startIndex ← overlap;
stopIndexPlusOne ← bufferSize};
block.count ← stopIndexPlusOne - block.startIndex;
got ← stream.UnsafeGetBlock[block];
block.startIndex ← block.startIndex + got;
bytes ← bytes + got;
IF winning # 0
THEN
[index, winning] ← ReportMatch[results, block, index, winning];
ENDLOOP;
FOR keyIndex:
CARDINAL
DECREASING
IN [0..length)
DO
IF map[buffer[index ← index-1]] # pattern[keyIndex]
THEN {
index ← index + MAX[delta1[map[buffer[index]]], delta2[keyIndex]];
EXIT};
REPEAT
FINISHED => {
-- found a match
matches ← matches + 1;
IF firstMatch
THEN {
firstMatch ← FALSE;
results.PutF["%g\n", [rope[file]]]};
[index, winning] ← ReportMatch[
results, block, index --+length-1--, 0,
bytes+index --+length-1-- -block.startIndex];
index ← (IF abort ← checkAbort[] THEN block.startIndex ELSE index) + 2};
ENDLOOP;
ENDLOOP;
END; -- of nested ABORTED catchphrase
EXITS aborted => {RETURN [TRUE, matches]};
};
ReportMatch:
PROC [
report: STREAM, block: Basics.UnsafeBlock,
at, limit: INT, total: INT ← 0]
RETURNS [lastUsed, waitingForCR: CARDINAL] = TRUSTED {
extend: INT = IF limit = 0 THEN overlap ELSE limit;
backTo: INT = MAX[at, extend] - extend;
forwardTo: INT = MIN[at+extend, block.startIndex];
IF limit = 0
THEN {
-- initial match as opposed to later search for closing CR
report.PutF["%5g: ", [cardinal[total]]];
FOR cr:
INT
DECREASING
IN [backTo..at)
DO
IF block.base[cr] = ('\n-0C)
THEN {
report.UnsafePutBlock[[block.base, cr+1, at - cr-1]];
EXIT};
REPEAT
FINISHED => {
IF total > extend THEN report.PutRope["..."];
report.UnsafePutBlock[[block.base, backTo, at - backTo]]};
ENDLOOP};
FOR cr:
INT
IN [at..forwardTo)
DO
IF block.base[cr] = ('\n-0C)
THEN {
report.UnsafePutBlock[[block.base, at, cr+1 - at]];
RETURN [cr, 0]};
ENDLOOP;
report.UnsafePutBlock[[block.base, at, forwardTo - at]];
IF (waitingForCR ← at+extend - forwardTo) = 0 THEN report.PutRope["...\n"];
lastUsed ← forwardTo - 1};
---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
Command Line Registration
---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
Commander.Register[
"QFind", SearchListOfFiles,
"Search list of files for occurrances of a string token
usage: QFind [-c] <search string (possibly quoted)> <list of files>"];
}.