GrepImpl.mesa
Copyright © 1985 by Xerox Corporation. All rights reserved.
Russ Atkinson (RRA) June 1, 1985 10:03:13 am PDT
Donahue, July 16, 1985 8:29:50 am PDT
John Larson, September 22, 1985 9:57:21 pm PDT
Peter Kessler November 7, 1985 11:35:09 am PST
Tim Diebert: December 31, 1985 8:59:11 am PST
Bob Hagmann February 6, 1986 2:16:28 pm PST
DIRECTORY
Ascii USING [Upper],
Commander USING [CommandProc, Register],
CommandTool USING [ParseToList],
FS USING [defaultStreamOptions, EnumerateForNames, Error, NameProc, StreamOpen, StreamOptions],
Grep,
IO USING [Close, EndOfStream, GetLine, PutBlock, PutChar, PutF, PutF1, PutRope, PutText, STREAM],
RefText USING [TrustTextAsRope],
Rope USING [Concat, Equal, Fetch, Length, ROPE],
RegularExpression USING [CreateFromRope, Finder, MalformedPattern, SearchRope],
RuntimeError USING [BoundsFault];
GrepImpl: CEDAR PROGRAM
IMPORTS Ascii, Commander, CommandTool, FS, IO, RefText, Rope, RegularExpression, RuntimeError
EXPORTS Grep = {
ROPE: TYPE = Rope.ROPE;
STREAM: TYPE = IO.STREAM;
binaryFileExtensions: LIST OF ROPELIST[".bcd", ".press", ".ip", ".interpress", ".symbols", ".tipc", ".boot", ".versionmap", ".bittable"];
GrepCmd: Commander.CommandProc = {
Callable from the command interpreter. Expects a list of file names as arguments, and counts the number of words in the indicated files.
stdout: STREAM ← cmd.out;
stdin: STREAM ← cmd.in;
stderr: STREAM ← cmd.err;
cmdLine: LIST OF ROPE;
fileCount: INT ← 0;
totalNumberOfHits, totalFileHits, totalFilesExamined: INT ← 0;
pattern: RegularExpression.Finder;
interrupt: REF BOOLNEW[BOOLFALSE];
openOptions: FS.StreamOptions ← FS.defaultStreamOptions;
stopOnFirstError: BOOL;
ignoreTiogaFormatting: BOOLFALSE;
binaryFilesToo: BOOL;
literal, word, ignoreCase: BOOL;
Interesting pattern options
switches: Grep.SwitchSettings;
regularOutput: BOOL;
Display options.
UsageMessage: PROC [] = {
IO.PutRope[stderr, "Usage: GREP [switches] <pattern> <fileNames>\n"];
};
GrepFile: FS.NameProc = {
file: STREAM;
hits: INT;
continue ← TRUE;
IF ~binaryFilesToo THEN
FOR l: LIST OF ROPE ← binaryFileExtensions, l.rest UNTIL l = NIL DO
IF Rope.Equal[fullFName, l.first, FALSE] THEN RETURN;
ENDLOOP;
file ← FS.StreamOpen[fullFName, read, openOptions
! FS.Error => IF error.group # bug THEN {
IO.PutF1[stderr, "** %g\n", [rope[error.explanation]]];
GOTO openFailed;
}];
IF switches[verbose] THEN IO.PutF1[stderr, "Searching \"%g\"...", [rope[fullFName]]];
hits ← GrepStream[pattern: pattern, inStream: file, fileName: fullFName, outStream: stdout, switches: switches, interrupt: interrupt];
totalNumberOfHits ← totalNumberOfHits + hits;
totalFileHits ← totalFileHits + (IF hits > 0 THEN 1 ELSE 0);
totalFilesExamined ← totalFilesExamined+1;
IO.Close[file];
continue ← ~interrupt^;
EXITS
openFailed => {
interrupt^ ← interrupt^ OR stopOnFirstError;
continue ← ~interrupt^;
};
};
cmdLine ← CommandTool.ParseToList[cmd].list;
[literal, cmdLine] ← GetSwitch["-pattern", cmdLine];
literal ← ~literal;
[ignoreCase, cmdLine] ← GetSwitch["-caseSensitive", cmdLine];
ignoreCase ← ~ignoreCase;
[word, cmdLine] ← GetSwitch["-word", cmdLine];
[stopOnFirstError, cmdLine] ← GetSwitch["-stopOnFirstError", cmdLine];
[binaryFilesToo, cmdLine] ← GetSwitch["-binaryFilesToo", cmdLine];
[ignoreTiogaFormatting, cmdLine] ← GetSwitch["-ignoreTiogaFormatting", cmdLine];
[switches, cmdLine] ← GetSwitches[cmdLine];
IF SwitchesLeft[cmdLine, stderr] THEN GOTO prematureExit;
IF cmdLine = NIL THEN {
UsageMessage[];
GOTO prematureExit;
};
regularOutput ← RegularOutput[switches];
pattern ← RegularExpression.CreateFromRope[pattern: cmdLine.first, literal: literal, word: word, ignoreCase: ignoreCase ! RegularExpression.MalformedPattern => {
IO.PutF1[stderr, "Syntax error in pattern \"%g\"\n", [rope[cmdLine.first]]];
GOTO prematureExit}];
cmdLine ← cmdLine.rest; -- Get rid of the pattern.
openOptions ← FS.defaultStreamOptions;
openOptions[tiogaRead] ← ignoreTiogaFormatting;
IF cmdLine = NIL
THEN totalNumberOfHits ← GrepStream[pattern: pattern, inStream: stdin, fileName: "Standard input", outStream: stdout, interrupt: interrupt, switches: switches]
ELSE
FOR l: LIST OF ROPE ← cmdLine, l.rest UNTIL l = NIL DO
currentFiles: INT ← totalFilesExamined;
pattern: ROPE ← DefaultToHighestGeneration[l.first];
FS.EnumerateForNames[pattern, GrepFile
! FS.Error => IF error.group # bug THEN {
IO.PutF1[stderr, "** %g\n", [rope[error.explanation]] ];
LOOP};
];
IF interrupt^ THEN GOTO prematureExit;
IF currentFiles = totalFilesExamined THEN
IO.PutF1[stderr, "** No files examined for '%g'\n", [rope[pattern]] ];
ENDLOOP;
IF regularOutput OR switches[verbose] THEN
IO.PutF[stderr, "Files examined: %g, files matched: %g, number of matches: %g\n",
[integer[totalFilesExamined]], [integer[totalFileHits]], [integer[totalNumberOfHits]]];
EXITS
prematureExit => NULL;
};
GetSwitches: PUBLIC PROC[cmdLine: LIST OF Rope.ROPE, prefixLen: INT ← 2] RETURNS[ switches: Grep.SwitchSettings, remainder: LIST OF Rope.ROPE ] = {
[switches[oncePerLine], remainder] ← GetSwitch["-oncePerLine", cmdLine, prefixLen];
[switches[fileNamesOnly], remainder] ← GetSwitch["-fileNamesOnly", remainder, prefixLen];
[switches[textOnly], remainder] ← GetSwitch["-textOnly", remainder, prefixLen];
[switches[positionsOnly], remainder] ← GetSwitch["-positionsOnly", remainder, prefixLen];
[switches[verbose], remainder] ← GetSwitch["-verbose", remainder, prefixLen]
};
GetSwitch: PROC [switch: ROPE, cmdLine: LIST OF ROPE, prefixLen: INT ← 2] RETURNS [present: BOOLFALSE, remainder: LIST OF ROPE] = {
IF cmdLine = NIL THEN RETURN;
IF Prefix[switch, cmdLine.first, prefixLen]
THEN {
present ← TRUE;
remainder ← cmdLine.rest;
}
ELSE {
remainder ← cmdLine;
FOR l: LIST OF ROPE ← cmdLine, l.rest UNTIL l.rest = NIL DO
IF Prefix[switch, l.rest.first, prefixLen] THEN {
l.rest ← l.rest.rest;
present ← TRUE;
RETURN
};
ENDLOOP;
};
};
SwitchesLeft: PROC [cmdLine: LIST OF ROPE, stderr: STREAM, switchChar: CHAR ← '-] RETURNS [switchesLeft: BOOLFALSE] = {
FOR l: LIST OF ROPE ← cmdLine, l.rest UNTIL l = NIL DO
IF Rope.Length[l.first] >= 1 THEN
IF Rope.Fetch[l.first] = switchChar THEN {
IO.PutF1[stderr, "Invalid switch: \"%g\"\n", [rope[l.first]]];
switchesLeft ← TRUE;
};
ENDLOOP;
};
Prefix: PROC [r1, r2: ROPE, length: INT, ignoreCase: BOOLTRUE] RETURNS [BOOL] = {
r2Len: INT ← r2.Length[];
r1Len: INT ← r1.Length[];
IF r1Len < length THEN ERROR;
IF r2Len > r1Len OR r2Len < length THEN RETURN[FALSE];
IF ~ignoreCase
THEN
FOR i: INT IN [0..r2Len) DO
IF r1.Fetch[i] # r2.Fetch[i] THEN RETURN[FALSE];
ENDLOOP
ELSE
FOR i: INT IN [0..r2Len) DO
IF Ascii.Upper[r1.Fetch[i]] # Ascii.Upper[r2.Fetch[i]] THEN RETURN[FALSE];
ENDLOOP;
RETURN[TRUE];
};
DefaultToHighestGeneration: PROC [filePattern: ROPE] RETURNS [ROPE] = {
len: INT ← Rope.Length[filePattern];
bang: INT ← len;
star: INT ← len;
dot: INT ← len;
pos: INT ← len;
WHILE pos > 0 DO
c: CHAR ← Rope.Fetch[filePattern, pos ← pos - 1];
SELECT c FROM
'! => bang ← pos;
'. => {dot ← pos; EXIT};
'* => IF star = len THEN star ← pos;
'>, '] => EXIT;
ENDCASE;
ENDLOOP;
SELECT TRUE FROM
dot = len AND star = len AND bang = len =>
filePattern ← Rope.Concat[filePattern, ".mesa!h"];
bang = len =>
filePattern ← Rope.Concat[filePattern, "!h"];
ENDCASE;
RETURN[filePattern];
};
RegularOutput: PUBLIC PROC[switches: Grep.SwitchSettings] RETURNS[ yes: BOOL ] = {
yes ← ~(switches[fileNamesOnly] OR switches[textOnly] OR switches[positionsOnly] OR switches[verbose])
};
GrepStream: PUBLIC PROC [pattern: RegularExpression.Finder, inStream, outStream: IO.STREAM, switches: Grep.SwitchSettings, fileName: Rope.ROPE, interrupt: REF BOOL] RETURNS [numberOfHits: INT] = {
found: BOOL;
lineTooLongError: BOOLFALSE;
at, atEnd, before, after: INT;
regularOutput: BOOL = RegularOutput[switches];
position: INT ← 0;
start: INT;
line: REF TEXTNEW[TEXT[200]];
numberOfHits ← 0;
DO
Match the pattern against the next line
line ← IO.GetLine[inStream, line !
IO.EndOfStream => EXIT;
RuntimeError.BoundsFault => {
IF ~lineTooLongError THEN IO.PutF[outStream, "Line too long in %g (%g). Line skipped.\n", [rope[fileName]], [integer[position]]];
lineTooLongError ← TRUE;
LOOP;
};
];
start ← 0;
DO
Keep matching the pattern until it fails.
[found, at, atEnd, before, after] ←
RegularExpression.SearchRope[pattern, RefText.TrustTextAsRope[line], start, line.length, interrupt];
IF ~found THEN EXIT;
IF switches[verbose] AND numberOfHits = 0 THEN
IO.PutRope[outStream, "\n"];
numberOfHits ← numberOfHits + 1;
SELECT TRUE FROM
switches[fileNamesOnly] => {
IO.PutF1[outStream, "%g\n", [rope[fileName]]];
GOTO prematureExit;
};
switches[textOnly] =>
IO.PutF1[outStream, "%g\n", [text[line]]];
switches[positionsOnly] =>
IO.PutF[outStream, "%g (%g)\n", [rope[fileName]], [integer[position+at]]];
regularOutput => {
IF numberOfHits = 1 THEN
IO.PutF1[outStream, "%g\n", [rope[fileName]]];
IO.PutF1[outStream, " (%g): ", [integer[position+at]]];
IF line.length > 50
THEN {
start: INTMAX[0, at-20];
end: INTMIN[line.length, start+50];
len: INT ← end-start;
dots: BOOL ← end < line.length;
IF start > 0 THEN IO.PutRope[outStream, "..."];
IO.PutBlock[self: outStream, block: line, startIndex: start, count: len];
IF dots THEN IO.PutRope[outStream, "..."];
}
ELSE
IO.PutText[outStream, line];
IO.PutChar[outStream, '\n];
};
switches[verbose] => IO.PutF[outStream, "%g (%g): %g\n", [rope[fileName]], [integer[position+at]], [text[line]]];
ENDCASE => ERROR;
start ← atEnd;
IF switches[oncePerLine] THEN EXIT;
ENDLOOP;
position ← position + line.length + 1;
REPEAT
prematureExit => NULL;
ENDLOOP;
IF switches[verbose] THEN
IF numberOfHits = 0 THEN IO.PutRope[outStream, "no matches found.\n"]
ELSE IO.PutF1[outStream, "Matched %g times.\n", [integer[numberOfHits]]] };
Commander.Register[
key: "Grep",
proc: GrepCmd,
doc: "Searches files for lines that match a pattern (uninterpreted)",
interpreted: FALSE];
Commander.Register[
key: "GrepI",
proc: GrepCmd,
doc: "Searches files for lines that match a pattern (interpreted)",
interpreted: TRUE];
}.
Bob Hagmann February 6, 1986 2:16:28 pm PST
Added check in GrepStream to avoid bounds errors for very long lines
changes to: GrepStream, DIRECTORY, GrepImpl