<<>> <> <> <> <> <> <<>> DIRECTORY Commander, CommanderOps, FS, IO, Rope, WordCount; <> <<>> WordCountImpl: CEDAR MONITOR IMPORTS Commander, CommanderOps, FS, IO, Rope, WordCount EXPORTS WordCount = { <> <<>> ROPE: TYPE = Rope.ROPE; STREAM: TYPE = IO.STREAM; RopeList: TYPE = LIST OF ROPE; <> <<>> CountStream: PUBLIC PROC [s: STREAM, tokenMode: BOOL ¬ FALSE] RETURNS [lines, words, characters: INT ¬ 0] ~ { inWord: BOOL ¬ FALSE; returns, linefeeds: INT ¬ 0; c: CHAR; { ENABLE IO.EndOfStream => GOTO EOS; DO c ¬ IO.GetChar[s]; characters ¬ characters.SUCC; SELECT c FROM '\r => { returns ¬ returns.SUCC; inWord ¬ FALSE; }; '\l => { linefeeds ¬ linefeeds.SUCC; inWord ¬ FALSE; }; <= ' , '\177 => { inWord ¬ FALSE; }; ENDCASE => { <> IF inWord THEN { -- see if we're still in the word IF tokenMode AND NOT (c IN ['a..'z] OR c IN ['A..'Z] OR c IN ['0..'9] OR c = '') THEN inWord ¬ FALSE; } ELSE { -- see if we should start a new word IF NOT tokenMode OR (c IN ['a..'z] OR c IN ['A..'Z] OR c IN ['0..'9]) THEN { words ¬ words.SUCC; inWord ¬ TRUE; }; }; }; ENDLOOP; EXITS EOS => NULL; }; IF returns = linefeeds THEN lines ¬ returns -- some systems use CR LF pairs for a single EOL ELSE lines ¬ returns + linefeeds; -- others use CR or LF <> IF characters > 0 AND NOT (c = '\r OR c = '\l) THEN lines ¬ lines.SUCC; s.Close[]; RETURN[lines, words, characters]; }; CountRope: PUBLIC PROC [r: ROPE, tokenMode: BOOL ¬ FALSE] RETURNS [lines, words, characters: INT ¬ 0] ~ { [lines, words, characters] ¬ CountStream[IO.RIS[rope: r, oldStream: NIL], tokenMode]; }; CountFile: PUBLIC PROC [file: ROPE, tokenMode: BOOL ¬ FALSE] RETURNS [lines, words, characters: INT ¬ 0]~ { problems: BOOL ¬ FALSE; fs: STREAM; fs ¬ FS.StreamOpen[file, $read ! FS.Error => {problems ¬ TRUE; CONTINUE} ]; IF NOT problems THEN [lines, words, characters] ¬ CountStream[fs, tokenMode] ELSE lines ¬ words ¬ characters ¬ -1; }; ReplaceAll: PROC [rope, old, new: ROPE, case: BOOL ¬ TRUE, pos: INT ¬ 0] RETURNS [ROPE] = { <> lenOld: INT ¬ Rope.Length[old]; lenNew: INT ¬ Rope.Length[new]; DO foundIndex: INT ¬ Rope.Find[rope, old, pos, case]; IF foundIndex = -1 THEN EXIT; rope ¬ Rope.Replace[rope, foundIndex, lenOld, new]; pos ¬ foundIndex + lenNew; ENDLOOP; RETURN[rope]; }; WcCommand: Commander.CommandProc ~ { PutInfo: PROC [lines, words, characters: INT, label: ROPE] ~ { IF showLines THEN ts.PutF1[" %7g", [integer[lines]]]; IF showWords THEN ts.PutF1[" %7g", [integer[words]]]; IF showCharacters THEN ts.PutF1[" %7g", [integer[characters]]]; IF includeFileNames THEN ts.PutF1[" %g", [rope[label]]]; ts.PutChar['\n]; }; AddOneFileName: FS.NameProc ~ { IF fileList = NIL THEN fileListTail ¬ fileList ¬ LIST[fullFName] ELSE fileListTail ¬ fileListTail.rest ¬ LIST[fullFName]; nPatterns ¬ nPatterns.SUCC; RETURN[TRUE]; }; ProcessAllFiles: PROC [fileList: RopeList] ~ { filesProcessed: INT ¬ 0; lines, words, characters: INT ¬ 0; totalLines, totalWords, totalCharacters: INT ¬ 0; WHILE fileList # NIL DO thisFile: ROPE ¬ fileList.first; [lines, words, characters] ¬ CountThisFile[thisFile]; totalLines ¬ totalLines + lines; totalWords ¬ totalWords + words; totalCharacters ¬ totalCharacters + characters; filesProcessed ¬ filesProcessed.SUCC; fileList ¬ fileList.rest; ENDLOOP; IF add OR filesProcessed >= 2 THEN PutInfo[totalLines, totalWords, totalCharacters, IO.PutFR["Total in %g %g.", [integer[filesProcessed]], [rope[IF filesProcessed = 1 THEN "file" ELSE "files"]]]]; }; CountThisFile: PROC [file: ROPE] RETURNS [lines, words, characters: INT ¬ 0]~ { [lines, words, characters] ¬ CountFile[file, tokenMode]; IF NOT add THEN PutInfo[lines, words, characters, file]; }; CountTheSelection: PROC ~ { lines, words, characters: INT ¬ 0; [lines, words, characters] ¬ WordCount.CountSelection[tokenMode]; PutInfo[lines, words, characters, "CurrentSelection"]; }; argv: CommanderOps.ArgumentVector ¬ CommanderOps.Parse[cmd]; ts: STREAM ¬ cmd.out; showLines, showWords, showCharacters: BOOL ¬ FALSE; add: BOOL ¬ FALSE; tokenMode: BOOL ¬ FALSE; includeFileNames: BOOL ¬ TRUE; i: INT ¬ 1; nPatterns: INT ¬ 0; fileList, fileListTail: RopeList ¬ NIL; currentSelection: BOOL ¬ FALSE; showLines ¬ showWords ¬ showCharacters ¬ FALSE; includeFileNames ¬ TRUE; WHILE i < argv.argc DO arg: ROPE ¬ argv[i]; SELECT TRUE FROM Rope.Match[pattern: "-*", object: arg, case: TRUE] => { FOR i: INT IN [1..Rope.Length[arg]) DO c: CHAR ¬ Rope.Fetch[base: arg, index: i]; SELECT c FROM 'l => showLines ¬ TRUE; 'w => showWords ¬ TRUE; 'c => showCharacters ¬ TRUE; 'f => includeFileNames ¬ FALSE; 's => currentSelection ¬ TRUE; 'a => add ¬ TRUE; 't => tokenMode ¬ TRUE; ENDCASE => { ts.PutF1["unknown option: \"%g\"\n", [character[c]]]; RETURN[result: $Failure, msg: ReplaceAll[usageRope, "toolname", argv[0]]]; }; ENDLOOP; }; ENDCASE => { temp: INT ¬ nPatterns; IF Rope.Find[arg, "!"] < 0 THEN arg ¬ Rope.Concat[arg, "!h"]; FS.EnumerateForNames[arg, AddOneFileName]; IF temp = nPatterns THEN { ts.PutF1["pattern does not match any files: \"%g\"\n", [rope[arg]]]; RETURN[result: $Failure, msg: ReplaceAll[usageRope, "toolname", argv[0]]]; }; }; i ¬ i + 1; ENDLOOP; IF nPatterns = 0 AND NOT currentSelection THEN { ts.PutRope["nothing to count\n"]; RETURN[result: $Failure, msg: ReplaceAll[usageRope, "toolname", argv[0]]]; }; IF currentSelection AND nPatterns > 0 THEN { ts.PutRope["can't specify current selection and file(s)\n"]; RETURN[result: $Failure, msg: ReplaceAll[usageRope, "toolname", argv[0]]]; }; IF NOT (showLines OR showWords OR showCharacters) THEN showLines ¬ showWords ¬ showCharacters ¬ TRUE; IF currentSelection THEN CountTheSelection[] ELSE ProcessAllFiles[fileList]; }; usageRope: ROPE ¬ "usage: toolname [-l] [-w] [-c] [-s] [-f] [-a] [-t] [file1] [file2] ... [filen]"; docRope: ROPE ¬ "toolname counts lines, words, and characters in the specified file(s), or in the current selection if the -s option is used. It also reports a total count if two or more files are counted or if the -a option is used specifying that the individual counts should be added together and only the total given. toolname normally defines a word to be a string of characters delimited by white-space; that is spaces, tabs, newlines, or other control characters. The -t option defines words to be similar to cedar tokens. Tioga documents are seen as \"simple\" documents (comments and formatting evaporate). Lines are defined to be the number of CR, LF, CR/LF pairs encountered. If the last line is not terminated by a CR and/or LF it is counted as a line none the less. toolname is very similar to the UNIX command wc. OPTIONS l Count lines. w Count words. c Count characters. s Count the current selection. (Useful for a Commander button.) f Don't include file names in output message. a Individual counts should be added together and only the total given. t Define word breaks to be similar to cedar tokens rather than white-space. The default is -lwc (count lines, words, and characters). When files are specified on the command line, their names will be printed along with the counts (unless you use the -f switch). EXAMPLE % toolname *.mesa 482 2093 15353 []<>Users>Test.mesa!1 181 719 5164 []<>Users>Test2.mesa!15 663 2812 20517 Total in 2 files. % "; Commander.Register[key: "WordCount", proc: WcCommand, doc: ReplaceAll[Rope.Cat[usageRope, "\n\n", docRope], "toolname", "WordCount"]]; }.