-- SoftwareTools.Mesa -- last edited by Satterthwaite, 23-Sep-81 12:21:52 SoftwareTools: PROGRAM = { -- global definitions -- characters and strings MAXCHAR: NAT = LAST[CHARACTER] - 0c; MAXSTR: NAT = 100; character: TYPE = [-1..MAXCHAR]; string: TYPE = PACKED ARRAY [1..MAXSTR] OF character; ENDFILE: character = -1; ENDSTR: character = 0; BACKSPACE: character = 9; TAB: character = 9; NEWLINE: character = 10; BLANK: character = 32; ESCAPE: character = ord['@]; -- file descriptors and io-related stuff MAXOPEN: NAT = 10; filedesc: TYPE = [0..MAXOPEN]; IOERROR: filedesc = 0; STDIN: filedesc = 1; STDOUT: filedesc = 2; STDERR: filedesc = 3; IOREAD: INTEGER = 1; IOWRITE: INTEGER = 2; IOBOTH: INTEGER = 3; -- standard (Pascal) procedures ord: PROC [c: CHARACTER] RETURNS [character] = INLINE {RETURN [c - 0c]}; -- primitives error: PROC [STRING]; message: PROC [STRING]; setstring: PROC [s: POINTER TO string, text: STRING]; open: PROC [name: POINTER TO string, mode: INTEGER] RETURNS [filedesc]; create: PROC [name: POINTER TO string, mode: INTEGER] RETURNS [filedesc]; close: PROC [fd: filedesc]; remove: PROC [s: POINTER TO string]; getc: PROC [c: POINTER TO character] RETURNS [character]; getcf: PROC [c: POINTER TO character, fd: filedesc] RETURNS [character]; getline: PROC [s: POINTER TO string, fd: filedesc, maxsize: INTEGER] RETURNS [BOOLEAN]; putc: PROC [c: character]; putcf: PROC [c: character, fd: filedesc]; putstr: PROC [s: POINTER TO string, fd: filedesc]; getarg: PROC [n: INTEGER, str: POINTER TO string, maxsize: INTEGER] RETURNS [BOOLEAN]; nargs: PROC RETURNS [INTEGER]; -- utilities -- addstr: put c in outset[j] if it fits, increment j addstr: PROC [c: character, outset: POINTER TO string, j: POINTER TO INTEGER, maxset: INTEGER] RETURNS [added: BOOLEAN] = { IF (j↑ > maxset) THEN added ← FALSE ELSE { outset[j↑] ← c; j↑ ← j↑ + 1; added ← TRUE}; RETURN}; -- equal: test two strings for equality equal: PROC [str1, str2: POINTER TO string] RETURNS [BOOLEAN] = { i: INTEGER ← 1; WHILE (str1[i] = str2[i]) AND (str1[i] # ENDSTR) DO i ← i + 1 ENDLOOP; RETURN [str1[i] = str2[i]]}; -- esc: map s[i↑] into escaped character, increment i↑ esc: PROC [s: POINTER TO string, i: POINTER TO INTEGER] RETURNS [c: character] = { IF (s[i↑] # ESCAPE) THEN c ← s[i↑] ELSE IF (s[i↑+1] = ENDSTR) THEN -- @ not special at end c ← ESCAPE ELSE { i↑ ← i↑ + 1; IF (s[i↑] = ord['n]) THEN c ← NEWLINE ELSE IF (s[i↑] = ord['t]) THEN c ← TAB ELSE c ← s[i↑]}; RETURN}; -- index: find position of character c in string s index: PROC [s: POINTER TO string, c: character] RETURNS [INTEGER] = { i: INTEGER ← 1; WHILE (s[i] # c) AND (s[i] # ENDSTR) DO i ← i + 1 ENDLOOP; RETURN [IF (s[i] = ENDSTR) THEN 0 ELSE i]}; -- isalphanum: true if c is a letter or digit isalphanum: PROC [c: character] RETURNS [BOOLEAN] = { RETURN [ (c IN [ord['a] .. ord['z]]) OR (c IN [ord['A] .. ord['Z]]) OR (c IN [ord['0] .. ord['9]])]}; -- isdigit: true if c is a digit isdigit: PROC [c: character] RETURNS [BOOLEAN] = INLINE { RETURN [c IN [ord['0] .. ord['9]]]}; -- isupper: true if c is an upper case letter isupper: PROC [c: character] RETURNS [BOOLEAN] = INLINE { RETURN [c IN [ord['A] .. ord['Z]]]}; -- itoc: convert integer n to char string in s[i] ... itoc: PROC [n: INTEGER, s: POINTER TO string, i: INTEGER] RETURNS [next: INTEGER] = { IF (n < 0) THEN { s[i] ← ord['-]; next ← itoc[-n, s, i+1]} ELSE { IF (n >= 10) THEN i ← itoc[n/10, s, i]; s[i] ← n MOD 10 + ord['0]; s[i+1] ← ENDSTR; next ← i+1}; RETURN}; -- length: compute length of string length: PROC [s: POINTER TO string] RETURNS [INTEGER] = { n: INTEGER ← 1; WHILE (s[n] # ENDSTR) DO n ← n + 1 ENDLOOP; RETURN [n - 1]}; -- mustcreate: create file or die mustcreate: PROC [name: POINTER TO string, mode: INTEGER] RETURNS [fd: filedesc] = { fd ← create[name, mode]; IF (fd = IOERROR) THEN { putstr[name, STDERR]; error[": can't create file"L]}; RETURN}; -- mustopen: open file or die mustopen: PROC [name: POINTER TO string, mode: INTEGER] RETURNS [fd: filedesc] = { fd ← open[name, mode]; IF (fd = IOERROR) THEN { putstr[name, STDERR]; error[": can't open file"L]}; RETURN}; -- scopy: copy string ar st src[i] to dest[j] scopy: PROC [src: POINTER TO string, i: INTEGER, dest: POINTER TO string, j: INTEGER] = { WHILE (src[i] # ENDSTR) DO dest[j] ← src[i]; i ← i + 1; j ← j + 1; ENDLOOP; dest[j] ← ENDSTR}; -- ctoi: convert string at s[i] to integer, increment i ctoi: PROC [s: POINTER TO string, i: POINTER TO INTEGER] RETURNS [INTEGER] = { n, sign: INTEGER; WHILE (s[i↑] = BLANK) OR (s[i↑] = TAB) DO i↑ ← i↑ + 1 ENDLOOP; sign ← IF s[i↑] = ord['-] THEN -1 ELSE 1; IF (s[i↑] = ord['+]) OR (s[i↑] = ord['-]) THEN i↑ ← i↑ + 1; n ← 0; WHILE (isdigit[s[i↑]]) DO n ← 10*n + (s[i↑] - ord['0]); i↑ ← i↑ + 1; ENDLOOP; RETURN [sign * n]}; -- fcopy: copy file fin to fout fcopy: PROC [fin, fout: filedesc] = { c: character; WHILE (getcf[@c, fin] # ENDFILE) DO putcf[c, fout] ENDLOOP}; -- putdec: put decimal integer n in field width >= w putdec: PROC [n, w: INTEGER] = { s: string; nd: INTEGER = itoc[n, @s, 1]; FOR i: INTEGER IN [nd .. w] DO putc[BLANK] ENDLOOP; FOR i: INTEGER IN [1 .. nd-1] DO putc[s[i]] ENDLOOP}; -- tools -- copy: copy standard input to standard output copy: PROC = { c: character; WHILE (getc[@c] # ENDFILE) DO putc[c] ENDLOOP}; -- charcount: counts characters in standard input charcount: PROC = { nc: INTEGER ← 0; c: character; WHILE (getc[@c] # ENDFILE) DO nc ← nc + 1 ENDLOOP; putdec[nc, 1]; putc[NEWLINE]}; -- linecount: counts lines in standard input linecount: PROC = { nl: INTEGER ← 0; c: character; WHILE (getc[@c] # ENDFILE) DO IF (c = NEWLINE) THEN nl ← nl + 1; ENDLOOP; putdec[nl, 1]; putc[NEWLINE]}; -- wordcount: count words in standard input wordcount: PROC = { nw: INTEGER ← 0; inword: BOOLEAN ← FALSE; c: character; WHILE (getc[@c] # ENDFILE) DO IF (c = BLANK) OR (c = NEWLINE) OR (c = TAB) THEN inword ← FALSE ELSE IF (~inword) THEN { inword ← TRUE; nw ← nw + 1} ENDLOOP; putdec[nw, 1]; putc[NEWLINE]}; -- shared definitions MAXLINE: NAT = 1000; tabtype: TYPE = PACKED ARRAY [1..MAXLINE] OF BOOLEAN; TABSPACE: NAT = 8; tabpos: PROC [col: INTEGER, tabstops: POINTER TO tabtype] RETURNS [BOOLEAN] = { -- return true if col is a tab stop RETURN [IF (col > MAXLINE) THEN TRUE ELSE tabstops[col]]}; settabs: PROC [tabstops: POINTER TO tabtype] = { -- set initial tab stops FOR i: INTEGER IN [1 .. MAXLINE] DO tabstops[i] ← (i MOD TABSPACE = 1) ENDLOOP}; -- detab: convert tabs to equivalent number of blanks detab: PROC = { c: character; col: INTEGER ← 1; tabstops: tabtype; settabs[@tabstops]; -- set initial tab stops WHILE (getc[@c] # ENDFILE) DO IF (c = TAB) THEN DO putc[BLANK]; col ← col + 1; IF tabpos[col, @tabstops] THEN EXIT; ENDLOOP ELSE IF (c = NEWLINE) THEN { putc[NEWLINE]; col ← 1} ELSE { putc[c]; col ← col + 1} ENDLOOP}; -- entab: replace blanks by tabs and blanks entab: PROC = { col: INTEGER ← 1; tabstops: tabtype; settabs[@tabstops]; DO newcol: INTEGER ← col; c: character; WHILE (getc[@c] = BLANK) DO -- collect blanks newcol ← newcol + 1; IF (tabpos[newcol, @tabstops]) THEN { putc[TAB]; col ← newcol}; ENDLOOP; WHILE (col < newcol) DO -- output leftover blanks putc[BLANK]; col ← col + 1; ENDLOOP; IF (c # ENDFILE) THEN { putc[c]; IF (c = NEWLINE) THEN col ← 1 ELSE col ← col + 1}; IF (c = ENDFILE) THEN EXIT; ENDLOOP}; -- overstrike: convert backspaces into multiple lines overstrike: PROC = { SKIP: character = BLANK; NOSKIP: character = ord['+]; c: character; col: INTEGER ← 1; DO newcol: INTEGER ← col; WHILE (getc[@c] = BACKSPACE) DO -- eat backspaces newcol ← MAX[newcol-1, 1] ENDLOOP; IF (newcol < col) THEN { putc[NEWLINE]; -- start overstrike line putc[NOSKIP]; FOR i: INTEGER IN [1..newcol) DO putc[BLANK] ENDLOOP; col ← newcol} ELSE IF (col = 1) AND (c # ENDFILE) THEN putc[SKIP]; -- normal line -- ELSE middle of line IF (c = ENDFILE) THEN EXIT; putc[c]; -- normal character col ← IF (c = NEWLINE) THEN 1 ELSE col + 1 ENDLOOP}; WARNING: character = ord['~]; -- compress: compress standard input compress: PROC = { c, lastc: character; n: INTEGER; putrep: PROC [n: INTEGER, c: character] = { -- put out representation of run of n 'c's MAXREP: NAT = 26; -- assuming ['A..'Z] THRESH: NAT = 4; WHILE (n >= THRESH) OR ((c = WARNING) AND (n > 0)) DO putc[WARNING]; putc[MIN[n, MAXREP] - 1 + ord['A]]; putc[c]; n ← n - MAXREP ENDLOOP; FOR n DECREASING IN [1..n] DO putc[c] ENDLOOP}; n ← 1; lastc ← getc[@lastc]; WHILE (lastc # ENDFILE) DO IF (getc[@c] = ENDFILE) THEN { IF (n > 1) OR (lastc = WARNING) THEN putrep[n, lastc] ELSE putc[lastc]} ELSE IF (c = lastc) THEN n ← n + 1 ELSE IF (n > 1) OR (lastc = WARNING) THEN { putrep[n, lastc]; n ← 1} ELSE putc[lastc]; lastc ← c ENDLOOP}; -- expand: uncompress standard input expand: PROC = { c: character; n: INTEGER; WHILE (getc[@c] # ENDFILE) DO IF (c # WARNING) THEN putc[c] ELSE IF (isupper[getc[@c]]) THEN { n ← c - ord['A] + 1; IF (getc[@c] # ENDFILE) THEN FOR n DECREASING IN [1..n] DO putc[c] ENDLOOP ELSE { putc[WARNING]; putc[n - 1 + ord['A]]}} ELSE { putc[WARNING]; IF (c # ENDFILE) THEN putc[c]} ENDLOOP}; -- echo: echo command line arguments to output echo: PROC = { argstr: string; i: INTEGER ← 1; WHILE (getarg[i, @argstr, MAXSTR]) DO IF (i > 1) THEN putc[BLANK]; FOR j: INTEGER IN [1 .. length[@argstr]] DO putc[argstr[j]] ENDLOOP; i ← i + 1 ENDLOOP; IF (i > 1) THEN putc[NEWLINE]}; -- translit: map characters translit: PROC = { NEGATE: character = ord['↑]; arg, fromset, toset: string; c: character; i, lastto: [0..MAXSTR]; allbut, squash: BOOLEAN; makeset: PROC [inset: POINTER TO string, k: INTEGER, outset: POINTER TO string, maxset: INTEGER] RETURNS [BOOLEAN] = { -- makeset: make set from inset[k] in outset dodash: PROC [delim: character, src: POINTER TO string, i: POINTER TO INTEGER, dest: POINTER TO string, j: POINTER TO INTEGER, maxset: INTEGER] = { -- dodash: expand set at src[i↑] into dest[j↑], stop at delim DASH: character = ord['-]; WHILE (src[i↑] # delim) AND (src[i↑] # ENDSTR) DO IF (src[i↑] = ESCAPE) THEN [] ← addstr[esc[src, i], dest, j, maxset] ELSE IF (src[i↑] # DASH) THEN [] ← addstr[src[i↑], dest, j, maxset] ELSE IF (j↑ <= 1) OR (src[i↑+1] = ENDSTR) THEN [] ← addstr[DASH, dest, j, maxset] -- literal - ELSE IF (isalphanum[src[i↑-1]]) AND (isalphanum[src[i↑+1]]) AND (src[i↑-1] <= src[i↑+1]) THEN { FOR k: character IN (src[i↑-1] .. src[i↑+1]] DO [] ← addstr[k, dest, j, maxset] ENDLOOP; i↑ ← i↑ + 1} ELSE [] ← addstr[DASH, dest, j, maxset]; i↑ ← i↑ + 1 ENDLOOP}; j: INTEGER ← 1; dodash[ENDSTR, inset, @k, outset, @j, maxset]; RETURN [addstr[ENDSTR, outset, @j, maxset]]}; xindex: PROC [inset: POINTER TO string, c: character, allbut: BOOLEAN, lastto: INTEGER] RETURNS [INTEGER] = { -- xindex: conditionally invert value from index RETURN [ IF (c = ENDFILE) THEN 0 ELSE IF (~allbut) THEN index[inset, c] ELSE IF (index[inset, c] > 0) THEN 0 ELSE lastto + 1]}; IF (~getarg[1, @arg, MAXSTR]) THEN error["usage: translit from to"L]; allbut ← (arg[1] = NEGATE); i ← IF (allbut) THEN 2 ELSE 1; IF (~makeset[@arg, i, @fromset, MAXSTR]) THEN error["translit: ""from"" set too large"L]; IF (~getarg[2, @arg, MAXSTR]) THEN toset[1] ← ENDSTR ELSE IF (~makeset[@arg, 1, @toset, MAXSTR]) THEN error["translit: ""to"" set too large"L] ELSE IF (length[@fromset] < length[@toset]) THEN error["translit: ""from"" shorter than ""to"""L]; lastto ← length[@toset]; squash ← (length[@fromset] > lastto) OR (allbut); DO i ← xindex[@fromset, getc[@c], allbut, lastto]; IF (squash) AND (i>=lastto) AND (lastto>0) THEN { putc[toset[lastto]]; DO i ← xindex[@fromset, getc[@c], allbut, lastto]; IF (i < lastto) THEN EXIT; ENDLOOP}; IF (c = ENDFILE) THEN EXIT; IF (i > 0) AND (lastto > 0) THEN -- translate putc[toset[i]] ELSE IF (i = 0) THEN -- copy putc[c] -- ELSE delete -- ENDLOOP}; -- compare: compare two files for equality compare: PROC = { line1, line2: string; arg1, arg2: string; lineno: INTEGER ← 0; infile1, infile2: filedesc; f1, f2: BOOLEAN; diffmsg: PROC [n: INTEGER, line1, line2: POINTER TO string] = { -- diffmsg: print line numbers and differing lines putdec[n, 1]; putc[ord[':]]; putc[NEWLINE]; putstr[line1, STDOUT]; putstr[line2, STDOUT]}; IF (~getarg[1, @arg1, MAXSTR]) OR (~getarg[2, @arg2, MAXSTR]) THEN error["usage: compare file1 file2"L]; infile1 ← mustopen[@arg1, IOREAD]; infile2 ← mustopen[@arg2, IOREAD]; DO lineno ← lineno + 1; f1 ← getline[@line1, infile1, MAXSTR]; f2 ← getline[@line2, infile2, MAXSTR]; IF (~f1 OR ~f2) THEN EXIT; IF (~equal[@line1, @line2]) THEN diffmsg[lineno, @line1, @line2] ENDLOOP; IF (f2 AND ~f1) THEN message["compare: end of file on file1"L] ELSE IF (f1 AND ~f2) THEN message["compare: end of file on file2"L]}; -- include: replace #include "file" by contents of file include: PROC = { incl: string; -- value is '#include' finclude: PROC [f: filedesc] = { -- finclude: include file desc f line, str: string; loc, i: INTEGER; f1: filedesc; getword: PROC [s: POINTER TO string, i: INTEGER, out: POINTER TO string] RETURNS [INTEGER] = { j: INTEGER ← 1; WHILE (s[i] = BLANK) OR (s[i] = TAB) OR (s[i] = NEWLINE) DO i ← i + 1 ENDLOOP; WHILE (~((s[i] = ENDSTR) OR (s[i] = BLANK) OR (s[i] = TAB) OR (s[i] = NEWLINE))) DO out[j] ← s[i]; i ← i + 1; j ← j + 1 ENDLOOP; out[j] ← ENDSTR; RETURN[IF (s[i] = ENDSTR) THEN 0 ELSE i]}; WHILE (getline[@line, f, MAXSTR]) DO loc ← getword[@line, 1, @str]; IF (~equal[@str, @incl]) THEN putstr[@line, STDOUT] ELSE { loc ← getword[@line, loc, @str]; str[length[@str]] ← ENDSTR; -- remove quotes FOR i IN [1 .. length[@str]] DO str[i] ← str[i+1] ENDLOOP; f1 ← mustopen[@str, IOREAD]; finclude[f1]; close[f1]} ENDLOOP}; setstring[@incl, "#include"L]; finclude[STDIN]}; -- concat: concatenate files onto standard input concat: PROC = { fd: filedesc; s: string; FOR i: INTEGER IN [1 .. nargs[]] DO [] ← getarg[i, @s, MAXSTR]; fd ← mustopen[@s, IOREAD]; fcopy[fd, STDOUT]; close[fd] ENDLOOP}; -- print: (default input STDIN) print files with headings print: PROC = { name: string; null: string; -- value '' fin: filedesc; fprint: PROC [name: POINTER TO string, fin: filedesc] = { -- fprint: print file "name" from fin MARGIN1: NAT = 2; MARGIN2: NAT = 2; BOTTOM: NAT = 64; PAGELEN: NAT = 66; line: string; lineno, pageno: INTEGER; skip: PROC [n: INTEGER] = { -- skip: output n blank lines FOR i: INTEGER IN [1..n] DO putc[NEWLINE] ENDLOOP}; head: PROC [name: POINTER TO string, pageno: INTEGER] = { -- head: print top of page header page: string; -- set to ' Page ' setstring[@page, " Page "L]; putstr[name, STDOUT]; putstr[@page, STDOUT]; putdec[pageno, 1]; putc[NEWLINE]}; pageno ← 1; skip[MARGIN1]; head[name, pageno]; skip[MARGIN2]; lineno ← MARGIN1 + MARGIN2 + 1; WHILE (getline[@line, fin, MAXSTR]) DO IF (lineno = 0) THEN { skip[MARGIN1]; pageno ← pageno + 1; head[name, pageno]; skip[MARGIN2]; lineno ← MARGIN1 + MARGIN2 + 1}; putstr[@line, STDOUT]; lineno ← lineno + 1; IF (lineno >= BOTTOM) THEN { skip[PAGELEN-lineno]; lineno ← 0} ENDLOOP; IF (lineno > 0) THEN skip[PAGELEN-lineno]}; setstring[@null, ""L]; IF (nargs[] = 0) THEN fprint[@null, STDIN] ELSE FOR i: INTEGER IN [1 .. nargs[]] DO [] ← getarg[i, @name, MAXSTR]; fin ← mustopen[@name, IOREAD]; fprint[@name, fin]; close[fin] ENDLOOP}; -- makecopy: copy one file to another makecopy: PROC = { inname, outname: string; fin, fout: filedesc; IF (~getarg[1, @inname, MAXSTR]) OR (~getarg[2, @outname, MAXSTR]) THEN error["usage: makecopy old new"L]; fin ← mustopen[@inname, IOREAD]; fout ← mustcreate[@outname, IOWRITE]; fcopy[fin, fout]; close[fin]; close[fout]}; -- sort: sort text lines in memory inmemsort: PROC = { MAXCHARS: NAT = 10000; -- maximum # of text characters MAXLINES: NAT = 300; -- maximum # of lines charbuf: TYPE = ARRAY [1..MAXCHARS] OF character; charpos: TYPE = [1..MAXCHARS]; posbuf: TYPE = ARRAY [1..MAXLINES] OF charpos; pos: TYPE = [0..MAXLINES]; linebuf: charbuf; linepos: posbuf; nlines: pos; gtext: PROC [linepos: POINTER TO posbuf, nlines: POINTER TO pos, linebuf: POINTER TO charbuf, infile: filedesc] RETURNS [BOOLEAN] = { -- gtext: get text lines into linebuf len: INTEGER; nextpos: charpos ← 1; temp: string; done: BOOLEAN; nlines↑ ← 0; DO done ← (~getline[@temp, infile, MAXSTR]); IF (~done) THEN { nlines↑ ← nlines↑ + 1; linepos[nlines↑] ← nextpos; len ← length[@temp]; FOR i: INTEGER IN [1..len] DO linebuf[nextpos+i-1] ← temp[i] ENDLOOP; linebuf[nextpos+len] ← ENDSTR; nextpos ← nextpos + len + 1}; -- 1 for ENDSTR IF (done) OR (nextpos >= MAXCHARS-MAXSTR) OR (nlines↑ >= MAXLINES) THEN EXIT ENDLOOP; RETURN [done]}; shell: PROC [linepos: POINTER TO posbuf, nlines: INTEGER, linebuf: POINTER TO charbuf] = { -- shell: ascending Shell sort for lines gap: INTEGER ← nlines/2; cmp: PROC [i, j: charpos, linebuf: POINTER TO charbuf] RETURNS [INTEGER] = INLINE { -- cmp: compare linebuf[i] with linebuf[j] WHILE (linebuf[i] = linebuf[j]) AND (linebuf[i] # ENDSTR) DO i ← i + 1; j ← j + 1 ENDLOOP; RETURN [ IF (linebuf[i] = linebuf[j]) THEN 0 ELSE IF (linebuf[i] = ENDSTR) THEN -1 -- 1st is shorter ELSE IF (linebuf[j] = ENDSTR) THEN 1 -- 2nd is shorter ELSE IF (linebuf[i] < linebuf[j]) THEN -1 ELSE 1]}; exchange: PROC [lp1, lp2: POINTER TO charpos] = INLINE { -- exchange: exchange linebuf[lp1] with linebuf[lp2] temp: charpos ← lp1↑; lp1↑ ← lp2↑; lp2↑ ← temp}; WHILE (gap > 0) DO FOR i: INTEGER IN [gap+1 .. nlines] DO j: INTEGER ← i - gap; WHILE (j > 0) DO jg: INTEGER ← j + gap; IF (cmp[linepos[j], linepos[jg], linebuf] <= 0) THEN j ← 0 -- force loop termination ELSE exchange[@linepos[j], @linepos[jg]]; j ← j - gap ENDLOOP; ENDLOOP; gap ← gap/2 ENDLOOP}; ptext: PROC [linepos: POINTER TO posbuf, nlines: pos, linebuf: POINTER TO charbuf, outfile: filedesc] = { -- ptext: output text lines from linebuf FOR i: pos IN [1..nlines] DO j: charpos ← linepos[i]; WHILE (linebuf[j] # ENDSTR) DO putcf[linebuf[j], outfile]; j ← j + 1 ENDLOOP; ENDLOOP}; IF (gtext[@linepos, @nlines, @linebuf, STDIN]) THEN { shell[@linepos, nlines, @linebuf]; ptext[@linepos, nlines, @linebuf, STDOUT]} ELSE error["sort: input too big to sort"L]}; -- sort: external sort of text lines sort: PROC = { MAXCHARS: NAT = 10000; -- maximum number of text chars MAXLINES: NAT = 300; -- maximum number of lines MERGEORDER: NAT = 5; charpos: TYPE = [1..MAXCHARS]; charbuf: TYPE = ARRAY [1..MAXCHARS] OF character; posbuf: TYPE = ARRAY [1..MAXLINES] OF charpos; pos: TYPE = [0..MAXLINES]; fdbuf: TYPE = ARRAY [1..MERGEORDER] OF filedesc; linebuf: charbuf; linepos: posbuf; nlines: pos; infile: fdbuf; outfile: filedesc; high, low, lim: INTEGER; done: BOOLEAN; name: string; cmp: PROC [i, j: charpos, linebuf: POINTER TO charbuf] RETURNS [INTEGER] = INLINE { -- cmp: compare linebuf[i] with linebuf[j] WHILE (linebuf[i] = linebuf[j]) AND (linebuf[i] # ENDSTR) DO i ← i + 1; j ← j + 1 ENDLOOP; RETURN [ IF (linebuf[i] = linebuf[j]) THEN 0 ELSE IF (linebuf[i] = ENDSTR) THEN -1 -- 1st is shorter ELSE IF (linebuf[j] = ENDSTR) THEN 1 -- 2nd is shorter ELSE IF (linebuf[i] < linebuf[j]) THEN -1 ELSE 1]}; exchange: PROC [lp1, lp2: POINTER TO charpos] = INLINE { -- exchange: exchange linebuf[lp1] with linebuf[lp2] temp: charpos ← lp1↑; lp1↑ ← lp2↑; lp2↑ ← temp}; gtext: PROC [linepos: POINTER TO posbuf, nlines: POINTER TO pos, linebuf: POINTER TO charbuf, infile: filedesc] RETURNS [BOOLEAN] = { -- gtext: get text lines into linebuf len: INTEGER; nextpos: charpos ← 1; temp: string; done: BOOLEAN; nlines↑ ← 0; DO done ← (~getline[@temp, infile, MAXSTR]); IF (~done) THEN { nlines↑ ← nlines↑ + 1; linepos[nlines↑] ← nextpos; len ← length[@temp]; FOR i: INTEGER IN [1..len] DO linebuf[nextpos+i-1] ← temp[i] ENDLOOP; linebuf[nextpos+len] ← ENDSTR; nextpos ← nextpos + len + 1}; -- 1 for ENDSTR IF (done) OR (nextpos >= MAXCHARS-MAXSTR) OR (nlines↑ >= MAXLINES) THEN EXIT ENDLOOP; RETURN [done]}; ptext: PROC [linepos: POINTER TO posbuf, nlines: pos, linebuf: POINTER TO charbuf, outfile: filedesc] = { -- ptext: output text lines from linebuf FOR i: pos IN [1..nlines] DO j: charpos ← linepos[i]; WHILE (linebuf[j] # ENDSTR) DO putcf[linebuf[j], outfile]; j ← j + 1 ENDLOOP; ENDLOOP}; quick: PROC [linepos: POINTER TO posbuf, nlines: pos, linebuf: POINTER TO charbuf] = { -- quick: quicksort for lines rquick: PROC [lo, hi: INTEGER] = { -- rquick: recursive quicksort IF (lo < hi) THEN { i: INTEGER ← lo; j: INTEGER ← hi; pivline: charpos ← linepos[j]; -- pivot line DO WHILE (i < j) AND (cmp[linepos[i], pivline, linebuf] <= 0) DO i ← i + 1 ENDLOOP; WHILE (j > i) AND (cmp[linepos[j], pivline, linebuf] >= 0) DO j ← j - 1 ENDLOOP; IF (i < j) THEN -- out of order pair exchange[@linepos[i], @linepos[j]]; IF (i >= j) THEN EXIT ENDLOOP; exchange[@linepos[i], @linepos[hi]]; -- move pivot to i IF (i - lo < hi - i) THEN { rquick[lo, i-1]; rquick[i+1, hi]} ELSE { rquick[i+1, hi]; rquick[lo, i-1]}}}; rquick[1, nlines]}; gname: PROC [n: INTEGER, name: POINTER TO string] = { -- gname: generate unique name for file id n setstring[name, "$stemp"L]; [] ← itoc[n, name, length[name]+1]}; makefile: PROC [n: INTEGER] RETURNS [filedesc] = { -- makefile: manke new file for number n name: string; gname[n, @name]; RETURN [mustcreate[@name, IOWRITE]]}; gopen: PROC [infile: POINTER TO fdbuf, f1, f2: INTEGER] = { -- gopen: open group of files f1 ... f2 name: string; FOR i: [1..MERGEORDER] IN [1 .. f2-f1+1] DO gname[f1+i-1, @name]; infile[i] ← mustopen[@name, IOREAD] ENDLOOP}; gremove: PROC [infile: POINTER TO fdbuf, f1, f2: INTEGER] = { -- gremove: remove group of files f1 ... f2 name: string; FOR i: [1..MERGEORDER] IN [1 .. f2-f1+1] DO close[infile[i]]; gname[f1+i-1, @name]; remove[@name]; ENDLOOP}; merge: PROC [infile: POINTER TO fdbuf, nf: INTEGER, outfile: filedesc] = { -- merge: merge infile[1] ... infile[nf] onto outfile j: INTEGER ← 0; lbp: charpos; temp: string; reheap: PROC [linepos: POINTER TO posbuf, nf: pos, linebuf: POINTER TO charbuf] = { -- reheap: put linebuf[linepos[1]] in proper place in heap i: INTEGER ← 1; j: INTEGER ← 2*i; WHILE (j <= nf) DO IF (j < nf) THEN -- find smaller child IF (cmp[linepos[j], linepos[j+1], linebuf] > 0) THEN j ← j + 1; IF (cmp[linepos[i], linepos[j], linebuf] <= 0) THEN i ← nf -- proper position found, terminate loop ELSE exchange[@linepos[i], @linepos[j]]; -- percolate i ← j; j ← 2 * i ENDLOOP}; sccopy: PROC [s: POINTER TO string, cb: POINTER TO charbuf, i: charpos] = { -- sccopy: copy string s to cb[i] j: INTEGER ← 1; WHILE (s[j] # ENDSTR) DO cb[i] ← s[j]; j ← j + 1; i ← i + 1 ENDLOOP; cb[i] ← ENDSTR}; cscopy: PROC [cb: POINTER TO charbuf, i: charpos, s: POINTER TO string] = { -- sccopy: copy cb[i] to string s j: INTEGER ← 1; WHILE (cb[i] # ENDSTR) DO s[j] ← cb[i]; i ← i + 1; j ← j + 1 ENDLOOP; s[j] ← ENDSTR}; FOR i: INTEGER IN [1..nf] DO -- get one line from each file IF (getline[@temp, infile[i], MAXSTR]) THEN { lbp ← (i-1)*MAXSTR + 1; -- room for longest sccopy[@temp, @linebuf, lbp]; linepos[i] ← lbp; j ← j + 1} ENDLOOP; nf ← j; quick[@linepos, nf, @linebuf]; -- make initial heap WHILE (nf > 0) DO i: INTEGER; lbp ← linepos[1]; -- lowest line cscopy[@linebuf, lbp, @temp]; putstr[@temp, outfile]; i ← lbp/MAXSTR + 1; -- compute file index IF (getline[@temp, infile[i], MAXSTR]) THEN sccopy[@temp, @linebuf, lbp] ELSE { -- one less input file linepos[1] ← linepos[nf]; nf ← nf - 1}; reheap[@linepos, nf, @linebuf] ENDLOOP}; high ← 0; DO -- initial formation of runs done ← gtext[@linepos, @nlines, @linebuf, STDIN]; quick[@linepos, nlines, @linebuf]; high ← high + 1; outfile ← makefile[high]; ptext[@linepos, nlines, @linebuf, outfile]; close[outfile]; IF done THEN EXIT ENDLOOP; low ← 1; WHILE (low < high) DO -- merge runs lim ← MIN[low+MERGEORDER-1, high]; gopen[@infile, low, lim]; high ← high + 1; outfile ← makefile[outfile]; merge[@infile, lim-low+1, outfile]; close[outfile]; gremove[@infile, low, lim]; low ← low + MERGEORDER ENDLOOP; gname[high, @name]; -- final cleanup outfile ← open[@name, IOREAD]; fcopy[outfile, STDOUT]; close[outfile]; remove[@name]}; -- unique: remove adjacent duplicate lines unique: PROC = { buf: ARRAY [0..1] OF string; cur: [0..1] ← 1; buf[1-cur][1] ← ENDSTR; WHILE (getline[@buf[cur], STDIN, MAXSTR]) DO IF (~equal[@buf[cur], @buf[1-cur]]) THEN { putstr[@buf[cur], STDOUT]; cur ← 1 - cur} ENDLOOP}; -- kwic: make keyword in context index kwic: PROC = { FOLD: character = ord['$]; buf: string; putrot: PROC [buf: POINTER TO string] = { -- putrot: create lines with keyword at front rotate: PROC [buf: POINTER TO string, n: INTEGER] = { -- rotate: output rotated line i: INTEGER ← n; WHILE (buf[i] # NEWLINE) AND (buf[i] # ENDSTR) DO putc[buf[i]]; i ← i + 1 ENDLOOP; putc[FOLD]; FOR i: INTEGER IN [1 .. n-1] DO putc[buf[i]] ENDLOOP; putc[NEWLINE]}; i: INTEGER ← 1; WHILE (buf[i] # NEWLINE) AND (buf[i] # ENDSTR) DO IF (isalphanum[buf[i]]) THEN { rotate[buf, i]; -- token starts at "i" i ← i + 1; WHILE (~isalphanum[buf[i]]) DO i ← i + 1 ENDLOOP}; i ← i + 1 ENDLOOP}; WHILE (getline[@buf, STDIN, MAXSTR]) DO putrot[@buf] ENDLOOP}; -- unrotate: unrotate lines rotated by kwic unrotate: PROC = { MAXOUT: NAT = 80; MIDDLE: NAT = 40; FOLD: character = ord['$]; inbuf, outbuf: string; WHILE (getline[@inbuf, STDIN, MAXSTR]) DO j, f: INTEGER; FOR i: INTEGER IN [1 .. MAXOUT) DO outbuf[i] ← BLANK ENDLOOP; f ← index[@inbuf, FOLD]; j ← MIDDLE - 1; FOR i: INTEGER DECREASING IN (f .. length[@inbuf]) DO outbuf[j] ← inbuf[i]; j ← j - 1; IF (j <= 0) THEN j ← MAXOUT - 1 ENDLOOP; j ← MIDDLE + 1; FOR i: INTEGER IN [1 .. f) DO outbuf[j] ← inbuf[i]; j ← j MOD (MAXOUT-1) + 1 ENDLOOP; FOR k: INTEGER IN [1 .. MAXOUT) DO IF (outbuf[k] # BLANK) THEN j ← k; ENDLOOP; outbuf[j+1] ← ENDSTR; putstr[@outbuf, STDOUT]; putc[NEWLINE] ENDLOOP}; }.