QFind.mesa
Copyright © 1985 by Xerox Corporation. All rights reserved.
Sweet December 5, 1985 10:52:28 am PST
DIRECTORY
Basics USING [charsPerWord, UnsafeBlock],
BasicTime USING [GetClockPulses, Pulses, PulsesToSeconds],
Commander USING [CommandProc, Register],
CommandTool USING [Failed, ParseToList],
FS USING [EnumerateForNames, Error, NameProc, StreamOpen],
IO USING [Close, EndOf, PutF, PutFR, PutRope, STREAM, UnsafeGetBlock, UnsafePutBlock],
PrincOpsUtils USING [LongCopy],
Process USING [CheckForAbort],
Rope USING [Concat, Fetch, Length, ROPE, ToRefText],
VM USING [AddressForPageNumber, Allocate, bytesPerPage, Free, Interval];
QFind: CEDAR PROGRAM
IMPORTS BasicTime, Commander, CommandTool, FS, IO, PrincOpsUtils, Process, Rope, VM = {
STREAM: TYPE = IO.STREAM;
ROPE: TYPE = Rope.ROPE;
overlap: CARDINAL = 150; -- keep this many chars from previous buffer so we can try to back up to beginning of line when we find a match (must be even)
bufferPages: NAT = 40;
bufferSize: CARDINAL = bufferPages*VM.bytesPerPage;
Buffer: TYPE = RECORD [PACKED SEQUENCE COMPUTED CARDINAL OF CHAR];
Map: TYPE = PACKED ARRAY CHAR OF CHAR;
OpenFile: PROC [name: ROPE] RETURNS [st: STREAM] = {
st ← FS.StreamOpen[name, $read
! FS.Error => IF error.group # bug THEN CONTINUE]};
SearchListOfFiles: Commander.CommandProc = {
[cmd: Commander.Handle] RETURNS [result: REF ANY ← NIL, msg: ROPE ← NIL]
log: IO.STREAM ← cmd.err;
out: IO.STREAM ← cmd.out;
inputList: LIST OF ROPE;
buffer: LONG POINTER TO Buffer;
vmInt: VM.Interval;
key, file: ROPE;
bm: BMhandle;
abort: BOOL;
matches, files: INT ← 0;
startTime: BasicTime.Pulses ← BasicTime.GetClockPulses[];
map: REF Map;
caseless: BOOLTRUE;
tkey: REF TEXT;
SearchFile: FS.NameProc = {
fMatches: INT;
[abort: abort, matches: fMatches] ← BoyerMooreSearch[file: fullFName, bm: bm, buffer: buffer, results: out, map: map];
files ← files + 1;
matches ← matches + fMatches;
continue ← ~abort;
};
inputList ← CommandTool.ParseToList[cmd: cmd, starExpand: FALSE, switchChar: '- !
CommandTool.Failed => {log.PutRope["invalid input format\n"]; GO TO quit}].list;
IF inputList = NIL THEN RETURN[NIL, "No input specified"];
map ← NEW [Map];
FOR c: CHAR IN CHAR DO
map[c] ← IF c IN ['a..'z] THEN c - 'a + 'A ELSE c;
ENDLOOP;
DO
key ← inputList.first; inputList ← inputList.rest;
IF Rope.Fetch[key, 0] = '- THEN {
SELECT Rope.Fetch[key, 1] FROM
'c, 'C => FOR c: CHAR IN ['a..'z] DO map[c] ← c ENDLOOP;
ENDCASE => {
log.PutF["invalid switch: $g", [character[Rope.Fetch[key, 1]]]]; GO TO quit};
LOOP};
EXIT;
ENDLOOP;
tkey ← Rope.ToRefText[key];
FOR i: CARDINAL IN [0..tkey.length) DO
tkey[i] ← map[tkey[i]];
ENDLOOP;
bm ← MakeFailureFunctions[tkey];
vmInt ← VM.Allocate[bufferPages];
buffer ← LOOPHOLE[VM.AddressForPageNumber[vmInt.page]];
WHILE inputList # NIL DO
ENABLE
UNWIND => TRUSTED { --give buffer back
VM.Free[vmInt]};
pattern: ROPE ← DefaultToHighestGeneration[inputList.first];
file ← inputList.first; inputList ← inputList.rest;
FS.EnumerateForNames[pattern, SearchFile
! FS.Error => IF error.group # bug THEN {
out.PutF["** %g\n", [rope[error.explanation]] ];
LOOP};
];
IF abort THEN EXIT;
ENDLOOP;
TRUSTED {VM.Free[vmInt]};
IF abort THEN out.PutRope["\nABORTED\n"];
RETURN[
IF abort THEN $aborted ELSE NIL,
IO.PutFR["%g files, %g matches, %g seconds", [integer[files]], [integer[matches]], [real[BasicTime.PulsesToSeconds[BasicTime.GetClockPulses[] - startTime]]]]];
EXITS
quit => RETURN[$Failure, NIL];
};
DefaultToHighestGeneration: PROC [filePattern: ROPE] RETURNS [ROPE] = {
len: INT ← Rope.Length[filePattern];
bang: INT ← len;
star: INT ← len;
dot: INT ← len;
pos: INT ← len;
WHILE pos > 0 DO
c: CHAR ← Rope.Fetch[filePattern, pos ← pos - 1];
SELECT c FROM
'! => bang ← pos;
'. => {dot ← pos; EXIT};
'* => IF star = len THEN star ← pos;
'>, '] => EXIT;
ENDCASE;
ENDLOOP;
SELECT TRUE FROM
dot = len AND star = len AND bang = len =>
filePattern ← Rope.Concat[filePattern, ".mesa!h"];
bang = len =>
filePattern ← Rope.Concat[filePattern, "!h"];
ENDCASE;
RETURN[filePattern];
};
---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
Boyer-Moore parsing: construct tables of offsets for when match fails
---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
Delta1: TYPE = ARRAY CHAR OF CARDINAL;
Delta2: TYPE = RECORD [SEQUENCE COMPUTED CARDINAL OF CARDINAL];
BMhandle: TYPE = REF BMdata;
BMdata: TYPE = RECORD [
pattern: REF TEXT,
delta1: REF Delta1,
delta2: REF Delta2];
MakeFailureFunctions: PROCEDURE [key: REF TEXT]
RETURNS [BMhandle] = {
See Knuth, Morris, Pratt, "Fast Pattern...", SIAM J. Comp, June 1977
1/12/84:RSS Above algorithm (dd') fails for patterns in which all characters are identical. Changed implementation to calculate dd instead of dd' in p342 algorithm.
c: CARDINAL;
length: CARDINAL = key.length;
t: CARDINAL ← length;
delta1: REF Delta1 = NEW[Delta1];
delta2: REF Delta2 = NEW[Delta2[length]];
aux: REF Delta2 ← NEW[Delta2[length]];
FOR ch: CHAR IN CHAR DO delta1[ch] ← length+1 ENDLOOP;
FOR c IN [0..length) DO
delta2[c] ← length + (delta1[key[c]] ← length - c);
ENDLOOP;
FOR c DECREASING IN [0..length) DO
aux[c] ← t;
WHILE t < length AND key[c] # key[t] DO
t ← aux[t];
ENDLOOP;
t ← t - 1;
delta2[t] ← MIN[delta2[t], length - (c-1)];
ENDLOOP;
FOR c IN [0..t] DO delta2[c] ← MIN[delta2[c], length+1+t-c] ENDLOOP;
RETURN [NEW[BMdata ← [pattern: key, delta1: delta1, delta2: delta2]]]};
BoyerMooreSearch: PROC [file: ROPE, bm: BMhandle, buffer: LONG POINTER TO Buffer, results: STREAM, map: REF Map]
RETURNS [abort: BOOLEANFALSE, matches: LONG CARDINAL ← 0] = TRUSTED {
ENABLE ABORTED => GO TO aborted;
stream: STREAM;
checkAbort: PROC RETURNS [a: BOOLFALSE] = TRUSTED {
Process.CheckForAbort[!ABORTED => {a ← TRUE; CONTINUE}]};
firstMatch: BOOLTRUE;
delta1: REF Delta1 = bm.delta1;
delta2: REF Delta2 = bm.delta2;
pattern: REF TEXT = bm.pattern;
length: CARDINAL = pattern.length;
stopIndexPlusOne: INT ← bufferSize-overlap;
block: Basics.UnsafeBlock ← [LOOPHOLE[buffer], 0, stopIndexPlusOne - 0];
block points to remaining empty buffer beyond file contents currently there
ReportMatch ignores the count field of block
index: CARDINAL ← length;
bytes: LONG CARDINAL ← 0;
winning: CARDINAL ← 0; -- if # 0, we're in the middle of printing a match, and are willing to scan this many more chars looking for end of line
BEGIN -- for ENABLE purposes
ENABLE ABORTED => IF stream # NIL THEN stream.Close[];
stream ← OpenFile[file];
IF stream = NIL THEN {
results.PutF["%g cannot be opened\n", [rope[file]]];
Process.CheckForAbort[]; -- may raise ABORTED caught above
RETURN[FALSE, 0]};
DO
UNTIL index <= block.startIndex DO -- get enough characters to look at
got: CARDINAL;
IF stream.EndOf[] OR abort OR (abort ← checkAbort[]) THEN
{IF winning # 0 THEN results.PutRope["\n"]; stream.Close[]; RETURN};
IF block.startIndex = stopIndexPlusOne THEN {
PrincOpsUtils.LongCopy[
to: buffer,
from: buffer + (stopIndexPlusOne-overlap)
/ Basics.charsPerWord,
nwords: overlap/Basics.charsPerWord];
index ← index - (stopIndexPlusOne-overlap);
block.startIndex ← overlap;
stopIndexPlusOne ← bufferSize};
block.count ← stopIndexPlusOne - block.startIndex;
got ← stream.UnsafeGetBlock[block];
block.startIndex ← block.startIndex + got;
bytes ← bytes + got;
IF winning # 0 THEN
[index, winning] ← ReportMatch[results, block, index, winning];
ENDLOOP;
FOR keyIndex: CARDINAL DECREASING IN [0..length) DO
IF map[buffer[index ← index-1]] # pattern[keyIndex] THEN {
index ← index + MAX[delta1[map[buffer[index]]], delta2[keyIndex]];
EXIT};
REPEAT FINISHED => { -- found a match
matches ← matches + 1;
IF firstMatch THEN {
firstMatch ← FALSE;
results.PutF["%g\n", [rope[file]]]};
[index, winning] ← ReportMatch[
results, block, index --+length-1--, 0,
bytes+index --+length-1-- -block.startIndex];
index ← (IF abort ← checkAbort[] THEN block.startIndex ELSE index) + 2};
ENDLOOP;
ENDLOOP;
END; -- of nested ABORTED catchphrase
EXITS aborted => {RETURN [TRUE, matches]};
};
ReportMatch: PROC [
report: STREAM, block: Basics.UnsafeBlock,
at, limit: INT, total: INT ← 0]
RETURNS [lastUsed, waitingForCR: CARDINAL] = TRUSTED {
extend: INT = IF limit = 0 THEN overlap ELSE limit;
backTo: INT = MAX[at, extend] - extend;
forwardTo: INT = MIN[at+extend, block.startIndex];
IF limit = 0 THEN { -- initial match as opposed to later search for closing CR
report.PutF["%5g: ", [cardinal[total]]];
FOR cr: INT DECREASING IN [backTo..at) DO
IF block.base[cr] = ('\n-0C) THEN {
report.UnsafePutBlock[[block.base, cr+1, at - cr-1]];
EXIT};
REPEAT FINISHED => {
IF total > extend THEN report.PutRope["..."];
report.UnsafePutBlock[[block.base, backTo, at - backTo]]};
ENDLOOP};
FOR cr: INT IN [at..forwardTo) DO
IF block.base[cr] = ('\n-0C) THEN {
report.UnsafePutBlock[[block.base, at, cr+1 - at]];
RETURN [cr, 0]};
ENDLOOP;
report.UnsafePutBlock[[block.base, at, forwardTo - at]];
IF (waitingForCR ← at+extend - forwardTo) = 0 THEN report.PutRope["...\n"];
lastUsed ← forwardTo - 1};
---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
Command Line Registration
---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
Commander.Register[
"QFind", SearchListOfFiles,
"Search list of files for occurrances of a string token
usage: QFind [-c] <search string (possibly quoted)> <list of files>"];
}.