WordCountImpl.mesa
Copyright Ó 1988, 1992 by Xerox Corporation. All rights reserved.
Wes Irish, January 12, 1989 4:26:53 pm PST
Jules Bloomenthal July 7, 1992 12:32 pm PDT
Willie-s, November 13, 1991 10:49 am PST
WordCountImpl:
CEDAR
MONITOR
IMPORTS Commander, CommanderOps, FS, IO, Rope, WordCount
EXPORTS WordCount = {
TYPEs...
ROPE: TYPE = Rope.ROPE;
STREAM: TYPE = IO.STREAM;
RopeList: TYPE = LIST OF ROPE;
PROCs...
CountStream:
PUBLIC
PROC [s:
STREAM, tokenMode:
BOOL ¬
FALSE]
RETURNS [lines, words, characters:
INT ¬ 0] ~ {
inWord: BOOL ¬ FALSE;
returns, linefeeds: INT ¬ 0;
c: CHAR;
{
ENABLE
IO.EndOfStream => GOTO EOS;
DO
c ¬ IO.GetChar[s];
characters ¬ characters.SUCC;
SELECT c
FROM
'\r => {
returns ¬ returns.SUCC;
inWord ¬ FALSE;
};
'\l => {
linefeeds ¬ linefeeds.SUCC;
inWord ¬ FALSE;
};
<= ' , '\177 => {
inWord ¬ FALSE;
};
ENDCASE => {
note that apostrophes are not considered to start words, but they may be included
IF inWord
THEN {
-- see if we're still in the word
IF tokenMode AND NOT (c IN ['a..'z] OR c IN ['A..'Z] OR c IN ['0..'9] OR c = '') THEN inWord ¬ FALSE;
}
ELSE {
-- see if we should start a new word
IF
NOT tokenMode
OR (c
IN ['a..'z]
OR c
IN ['A..'Z]
OR c
IN ['0..'9])
THEN {
words ¬ words.SUCC;
inWord ¬ TRUE;
};
};
};
ENDLOOP;
};
IF returns = linefeeds
THEN lines ¬ returns -- some systems use CR LF pairs for a single EOL
ELSE lines ¬ returns + linefeeds; -- others use CR or LF
count a final line not terminated by CR or LF
IF characters > 0 AND NOT (c = '\r OR c = '\l) THEN lines ¬ lines.SUCC;
s.Close[];
RETURN[lines, words, characters];
};
CountRope:
PUBLIC
PROC [r:
ROPE, tokenMode:
BOOL ¬
FALSE]
RETURNS [lines, words, characters:
INT ¬ 0] ~ {
[lines, words, characters] ¬ CountStream[IO.RIS[rope: r, oldStream: NIL], tokenMode];
};
CountFile:
PUBLIC
PROC [file:
ROPE, tokenMode:
BOOL ¬
FALSE]
RETURNS [lines, words, characters:
INT ¬ 0]~ {
problems: BOOL ¬ FALSE;
fs: STREAM;
fs ¬
FS.StreamOpen[file, $read
! FS.Error => {problems ¬ TRUE; CONTINUE} ];
IF
NOT problems
THEN [lines, words, characters] ¬ CountStream[fs, tokenMode]
ELSE lines ¬ words ¬ characters ¬ -1;
};
ReplaceAll:
PROC [rope, old, new:
ROPE, case:
BOOL ¬
TRUE, pos:
INT ¬ 0]
RETURNS [
ROPE] = {
Replace all occurences of "old" with "new" in "rope" starting at "pos". Do the match for "old" with "case".
lenOld: INT ¬ Rope.Length[old];
lenNew: INT ¬ Rope.Length[new];
DO
foundIndex: INT ¬ Rope.Find[rope, old, pos, case];
IF foundIndex = -1 THEN EXIT;
rope ¬ Rope.Replace[rope, foundIndex, lenOld, new];
pos ¬ foundIndex + lenNew;
ENDLOOP;
RETURN[rope];
};
WcCommand: Commander.CommandProc ~ {
PutInfo:
PROC [lines, words, characters:
INT, label:
ROPE] ~ {
IF showLines THEN ts.PutF1[" %7g", [integer[lines]]];
IF showWords THEN ts.PutF1[" %7g", [integer[words]]];
IF showCharacters THEN ts.PutF1[" %7g", [integer[characters]]];
IF includeFileNames THEN ts.PutF1[" %g", [rope[label]]];
ts.PutChar['\n];
};
AddOneFileName:
FS.NameProc ~ {
IF fileList =
NIL
THEN fileListTail ¬ fileList ¬ LIST[fullFName]
ELSE fileListTail ¬ fileListTail.rest ¬ LIST[fullFName];
nPatterns ¬ nPatterns.SUCC;
RETURN[TRUE];
};
ProcessAllFiles:
PROC [fileList: RopeList] ~ {
filesProcessed: INT ¬ 0;
lines, words, characters: INT ¬ 0;
totalLines, totalWords, totalCharacters: INT ¬ 0;
WHILE fileList #
NIL
DO
thisFile: ROPE ¬ fileList.first;
[lines, words, characters] ¬ CountThisFile[thisFile];
totalLines ¬ totalLines + lines;
totalWords ¬ totalWords + words;
totalCharacters ¬ totalCharacters + characters;
filesProcessed ¬ filesProcessed.SUCC;
fileList ¬ fileList.rest;
ENDLOOP;
IF add OR filesProcessed >= 2 THEN PutInfo[totalLines, totalWords, totalCharacters, IO.PutFR["Total in %g %g.", [integer[filesProcessed]], [rope[IF filesProcessed = 1 THEN "file" ELSE "files"]]]];
};
CountThisFile:
PROC [file:
ROPE]
RETURNS [lines, words, characters:
INT ¬ 0]~ {
[lines, words, characters] ¬ CountFile[file, tokenMode];
IF NOT add THEN PutInfo[lines, words, characters, file];
};
CountTheSelection:
PROC ~ {
lines, words, characters: INT ¬ 0;
[lines, words, characters] ¬ WordCount.CountSelection[tokenMode];
PutInfo[lines, words, characters, "CurrentSelection"];
};
argv: CommanderOps.ArgumentVector ¬ CommanderOps.Parse[cmd];
ts: STREAM ¬ cmd.out;
showLines, showWords, showCharacters: BOOL ¬ FALSE;
add: BOOL ¬ FALSE;
tokenMode: BOOL ¬ FALSE;
includeFileNames: BOOL ¬ TRUE;
i: INT ¬ 1;
nPatterns: INT ¬ 0;
fileList, fileListTail: RopeList ¬ NIL;
currentSelection: BOOL ¬ FALSE;
showLines ¬ showWords ¬ showCharacters ¬ FALSE;
includeFileNames ¬ TRUE;
WHILE i < argv.argc
DO
arg: ROPE ¬ argv[i];
SELECT
TRUE
FROM
Rope.Match[pattern: "-*", object: arg, case:
TRUE] => {
FOR i:
INT
IN [1..Rope.Length[arg])
DO
c: CHAR ¬ Rope.Fetch[base: arg, index: i];
SELECT c
FROM
'l => showLines ¬ TRUE;
'w => showWords ¬ TRUE;
'c => showCharacters ¬ TRUE;
'f => includeFileNames ¬ FALSE;
's => currentSelection ¬ TRUE;
'a => add ¬ TRUE;
't => tokenMode ¬ TRUE;
ENDCASE => {
ts.PutF1["unknown option: \"%g\"\n", [character[c]]];
RETURN[result: $Failure, msg: ReplaceAll[usageRope, "toolname", argv[0]]];
};
ENDLOOP;
};
ENDCASE => {
temp: INT ¬ nPatterns;
IF Rope.Find[arg, "!"] < 0 THEN arg ¬ Rope.Concat[arg, "!h"];
FS.EnumerateForNames[arg, AddOneFileName];
IF temp = nPatterns
THEN {
ts.PutF1["pattern does not match any files: \"%g\"\n", [rope[arg]]];
RETURN[result: $Failure, msg: ReplaceAll[usageRope, "toolname", argv[0]]];
};
};
i ¬ i + 1;
ENDLOOP;
IF nPatterns = 0
AND
NOT currentSelection
THEN {
ts.PutRope["nothing to count\n"];
RETURN[result: $Failure, msg: ReplaceAll[usageRope, "toolname", argv[0]]];
};
IF currentSelection
AND nPatterns > 0
THEN {
ts.PutRope["can't specify current selection and file(s)\n"];
RETURN[result: $Failure, msg: ReplaceAll[usageRope, "toolname", argv[0]]];
};
IF
NOT (showLines
OR showWords
OR showCharacters)
THEN showLines ¬ showWords ¬ showCharacters ¬ TRUE;
IF currentSelection
THEN CountTheSelection[]
ELSE ProcessAllFiles[fileList];
};
usageRope: ROPE ¬ "usage: toolname [-l] [-w] [-c] [-s] [-f] [-a] [-t] [file1] [file2] ... [filen]";
docRope: ROPE ¬ "toolname counts lines, words, and characters in the specified file(s), or in the current selection if the -s option is used. It also reports a total count if two or more files are counted or if the -a option is used specifying that the individual counts should be added together and only the total given.
toolname normally defines a word to be a string of characters delimited by white-space; that is spaces, tabs, newlines, or other control characters. The -t option defines words to be similar to cedar tokens.
Tioga documents are seen as \"simple\" documents (comments and formatting evaporate).
Lines are defined to be the number of CR, LF, CR/LF pairs encountered. If the last line is not terminated by a CR and/or LF it is counted as a line none the less.
toolname is very similar to the UNIX command wc.
OPTIONS
l Count lines.
w Count words.
c Count characters.
s Count the current selection. (Useful for a Commander button.)
f Don't include file names in output message.
a Individual counts should be added together and only the total given.
t Define word breaks to be similar to cedar tokens rather than white-space.
The default is -lwc (count lines, words, and characters).
When files are specified on the command line, their names
will be printed along with the counts (unless you use the -f switch).
EXAMPLE
% toolname *.mesa
482 2093 15353 []<>Users>Test.mesa!1
181 719 5164 []<>Users>Test2.mesa!15
663 2812 20517 Total in 2 files.
%
";
Commander.Register[key: "WordCount", proc: WcCommand, doc: ReplaceAll[Rope.Cat[usageRope, "\n\n", docRope], "toolname", "WordCount"]];
}.