WordCountImpl.mesa
Copyright Ó 1988, 1992 by Xerox Corporation. All rights reserved.
Wes Irish, January 12, 1989 4:26:53 pm PST
Jules Bloomenthal July 7, 1992 12:32 pm PDT
Willie-s, November 13, 1991 10:49 am PST
DIRECTORY
Commander, CommanderOps, FS, IO, Rope, WordCount;
WordCount is very similar to the UNIX command wc. Counts the number of lines, words, and characters in a file (or stream, or rope, or selection).
WordCountImpl: CEDAR MONITOR
IMPORTS Commander, CommanderOps, FS, IO, Rope, WordCount
EXPORTS WordCount = {
TYPEs...
ROPE: TYPE = Rope.ROPE;
STREAM: TYPE = IO.STREAM;
RopeList: TYPE = LIST OF ROPE;
PROCs...
CountStream: PUBLIC PROC [s: STREAM, tokenMode: BOOL ¬ FALSE] RETURNS [lines, words, characters: INT ¬ 0] ~ {
inWord: BOOL ¬ FALSE;
returns, linefeeds: INT ¬ 0;
c: CHAR;
{
ENABLE
IO.EndOfStream => GOTO EOS;
DO
c ¬ IO.GetChar[s];
characters ¬ characters.SUCC;
SELECT c FROM
'\r => {
returns ¬ returns.SUCC;
inWord ¬ FALSE;
};
'\l => {
linefeeds ¬ linefeeds.SUCC;
inWord ¬ FALSE;
};
<= ' , '\177 => {
inWord ¬ FALSE;
};
ENDCASE => {
note that apostrophes are not considered to start words, but they may be included
IF inWord
THEN { -- see if we're still in the word
IF tokenMode AND NOT (c IN ['a..'z] OR c IN ['A..'Z] OR c IN ['0..'9] OR c = '') THEN inWord ¬ FALSE;
}
ELSE { -- see if we should start a new word
IF NOT tokenMode OR (c IN ['a..'z] OR c IN ['A..'Z] OR c IN ['0..'9]) THEN {
words ¬ words.SUCC;
inWord ¬ TRUE;
};
};
};
ENDLOOP;
EXITS
EOS => NULL;
};
IF returns = linefeeds
THEN lines ¬ returns -- some systems use CR LF pairs for a single EOL
ELSE lines ¬ returns + linefeeds; -- others use CR or LF
count a final line not terminated by CR or LF
IF characters > 0 AND NOT (c = '\r OR c = '\l) THEN lines ¬ lines.SUCC;
s.Close[];
RETURN[lines, words, characters];
};
CountRope: PUBLIC PROC [r: ROPE, tokenMode: BOOL ¬ FALSE] RETURNS [lines, words, characters: INT ¬ 0] ~ {
[lines, words, characters] ¬ CountStream[IO.RIS[rope: r, oldStream: NIL], tokenMode];
};
CountFile: PUBLIC PROC [file: ROPE, tokenMode: BOOL ¬ FALSE] RETURNS [lines, words, characters: INT ¬ 0]~ {
problems: BOOL ¬ FALSE;
fs: STREAM;
fs ¬ FS.StreamOpen[file, $read
! FS.Error => {problems ¬ TRUE; CONTINUE} ];
IF NOT problems
THEN [lines, words, characters] ¬ CountStream[fs, tokenMode]
ELSE lines ¬ words ¬ characters ¬ -1;
};
ReplaceAll: PROC [rope, old, new: ROPE, case: BOOL ¬ TRUE, pos: INT ¬ 0] RETURNS [ROPE] = {
Replace all occurences of "old" with "new" in "rope" starting at "pos". Do the match for "old" with "case".
lenOld: INT ¬ Rope.Length[old];
lenNew: INT ¬ Rope.Length[new];
DO
foundIndex: INT ¬ Rope.Find[rope, old, pos, case];
IF foundIndex = -1 THEN EXIT;
rope ¬ Rope.Replace[rope, foundIndex, lenOld, new];
pos ¬ foundIndex + lenNew;
ENDLOOP;
RETURN[rope];
};
WcCommand: Commander.CommandProc ~ {
PutInfo: PROC [lines, words, characters: INT, label: ROPE] ~ {
IF showLines THEN ts.PutF1[" %7g", [integer[lines]]];
IF showWords THEN ts.PutF1[" %7g", [integer[words]]];
IF showCharacters THEN ts.PutF1[" %7g", [integer[characters]]];
IF includeFileNames THEN ts.PutF1[" %g", [rope[label]]];
ts.PutChar['\n];
};
AddOneFileName: FS.NameProc ~ {
IF fileList = NIL
THEN fileListTail ¬ fileList ¬ LIST[fullFName]
ELSE fileListTail ¬ fileListTail.rest ¬ LIST[fullFName];
nPatterns ¬ nPatterns.SUCC;
RETURN[TRUE];
};
ProcessAllFiles: PROC [fileList: RopeList] ~ {
filesProcessed: INT ¬ 0;
lines, words, characters: INT ¬ 0;
totalLines, totalWords, totalCharacters: INT ¬ 0;
WHILE fileList # NIL DO
thisFile: ROPE ¬ fileList.first;
[lines, words, characters] ¬ CountThisFile[thisFile];
totalLines ¬ totalLines + lines;
totalWords ¬ totalWords + words;
totalCharacters ¬ totalCharacters + characters;
filesProcessed ¬ filesProcessed.SUCC;
fileList ¬ fileList.rest;
ENDLOOP;
IF add OR filesProcessed >= 2 THEN PutInfo[totalLines, totalWords, totalCharacters, IO.PutFR["Total in %g %g.", [integer[filesProcessed]], [rope[IF filesProcessed = 1 THEN "file" ELSE "files"]]]];
};
CountThisFile: PROC [file: ROPE] RETURNS [lines, words, characters: INT ¬ 0]~ {
[lines, words, characters] ¬ CountFile[file, tokenMode];
IF NOT add THEN PutInfo[lines, words, characters, file];
};
CountTheSelection: PROC ~ {
lines, words, characters: INT ¬ 0;
[lines, words, characters] ¬ WordCount.CountSelection[tokenMode];
PutInfo[lines, words, characters, "CurrentSelection"];
};
argv: CommanderOps.ArgumentVector ¬ CommanderOps.Parse[cmd];
ts: STREAM ¬ cmd.out;
showLines, showWords, showCharacters: BOOL ¬ FALSE;
add: BOOL ¬ FALSE;
tokenMode: BOOL ¬ FALSE;
includeFileNames: BOOL ¬ TRUE;
i: INT ¬ 1;
nPatterns: INT ¬ 0;
fileList, fileListTail: RopeList ¬ NIL;
currentSelection: BOOL ¬ FALSE;
showLines ¬ showWords ¬ showCharacters ¬ FALSE;
includeFileNames ¬ TRUE;
WHILE i < argv.argc DO
arg: ROPE ¬ argv[i];
SELECT TRUE FROM
Rope.Match[pattern: "-*", object: arg, case: TRUE] => {
FOR i: INT IN [1..Rope.Length[arg]) DO
c: CHAR ¬ Rope.Fetch[base: arg, index: i];
SELECT c FROM
'l => showLines ¬ TRUE;
'w => showWords ¬ TRUE;
'c => showCharacters ¬ TRUE;
'f => includeFileNames ¬ FALSE;
's => currentSelection ¬ TRUE;
'a => add ¬ TRUE;
't => tokenMode ¬ TRUE;
ENDCASE => {
ts.PutF1["unknown option: \"%g\"\n", [character[c]]];
RETURN[result: $Failure, msg: ReplaceAll[usageRope, "toolname", argv[0]]];
};
ENDLOOP;
};
ENDCASE => {
temp: INT ¬ nPatterns;
IF Rope.Find[arg, "!"] < 0 THEN arg ¬ Rope.Concat[arg, "!h"];
FS.EnumerateForNames[arg, AddOneFileName];
IF temp = nPatterns THEN {
ts.PutF1["pattern does not match any files: \"%g\"\n", [rope[arg]]];
RETURN[result: $Failure, msg: ReplaceAll[usageRope, "toolname", argv[0]]];
};
};
i ¬ i + 1;
ENDLOOP;
IF nPatterns = 0 AND NOT currentSelection THEN {
ts.PutRope["nothing to count\n"];
RETURN[result: $Failure, msg: ReplaceAll[usageRope, "toolname", argv[0]]];
};
IF currentSelection AND nPatterns > 0 THEN {
ts.PutRope["can't specify current selection and file(s)\n"];
RETURN[result: $Failure, msg: ReplaceAll[usageRope, "toolname", argv[0]]];
};
IF NOT (showLines OR showWords OR showCharacters)
THEN showLines ¬ showWords ¬ showCharacters ¬ TRUE;
IF currentSelection
THEN CountTheSelection[]
ELSE ProcessAllFiles[fileList];
};
usageRope: ROPE ¬ "usage: toolname [-l] [-w] [-c] [-s] [-f] [-a] [-t] [file1] [file2] ... [filen]";
docRope: ROPE ¬ "toolname counts lines, words, and characters in the specified file(s), or in the current selection if the -s option is used. It also reports a total count if two or more files are counted or if the -a option is used specifying that the individual counts should be added together and only the total given.

toolname normally defines a word to be a string of characters delimited by white-space; that is spaces, tabs, newlines, or other control characters. The -t option defines words to be similar to cedar tokens.

Tioga documents are seen as \"simple\" documents (comments and formatting evaporate).

Lines are defined to be the number of CR, LF, CR/LF pairs encountered. If the last line is not terminated by a CR and/or LF it is counted as a line none the less.

toolname is very similar to the UNIX command wc.

OPTIONS
l Count lines.
w Count words.
c Count characters.
s Count the current selection. (Useful for a Commander button.)
f Don't include file names in output message.
a Individual counts should be added together and only the total given.
t Define word breaks to be similar to cedar tokens rather than white-space.

The default is -lwc (count lines, words, and characters).

When files are specified on the command line, their names
will be printed along with the counts (unless you use the -f switch).

EXAMPLE
% toolname *.mesa
482 2093 15353 []<>Users>Test.mesa!1
181 719 5164 []<>Users>Test2.mesa!15
663 2812 20517 Total in 2 files.
%
";
Commander.Register[key: "WordCount", proc: WcCommand, doc: ReplaceAll[Rope.Cat[usageRope, "\n\n", docRope], "toolname", "WordCount"]];
}.