-- SoftwareTools.Mesa
-- last edited by Satterthwaite, 23-Sep-81 12:21:52
SoftwareTools: PROGRAM = {
-- global definitions
-- characters and strings
MAXCHAR: NAT = LAST[CHARACTER] - 0c;
MAXSTR: NAT = 100;
character: TYPE = [-1..MAXCHAR];
string: TYPE = PACKED ARRAY [1..MAXSTR] OF character;
ENDFILE: character = -1;
ENDSTR: character = 0;
BACKSPACE: character = 9;
TAB: character = 9;
NEWLINE: character = 10;
BLANK: character = 32;
ESCAPE: character = ord['@];
-- file descriptors and io-related stuff
MAXOPEN: NAT = 10;
filedesc: TYPE = [0..MAXOPEN];
IOERROR: filedesc = 0;
STDIN: filedesc = 1;
STDOUT: filedesc = 2;
STDERR: filedesc = 3;
IOREAD: INTEGER = 1;
IOWRITE: INTEGER = 2;
IOBOTH: INTEGER = 3;
-- standard (Pascal) procedures
ord: PROC [c: CHARACTER] RETURNS [character] = INLINE {RETURN [c - 0c]};
-- primitives
error: PROC [STRING];
message: PROC [STRING];
setstring: PROC [s: POINTER TO string, text: STRING];
open: PROC [name: POINTER TO string, mode: INTEGER] RETURNS [filedesc];
create: PROC [name: POINTER TO string, mode: INTEGER] RETURNS [filedesc];
close: PROC [fd: filedesc];
remove: PROC [s: POINTER TO string];
getc: PROC [c: POINTER TO character] RETURNS [character];
getcf: PROC [c: POINTER TO character, fd: filedesc] RETURNS [character];
getline: PROC [s: POINTER TO string, fd: filedesc, maxsize: INTEGER] RETURNS [BOOLEAN];
putc: PROC [c: character];
putcf: PROC [c: character, fd: filedesc];
putstr: PROC [s: POINTER TO string, fd: filedesc];
getarg: PROC [n: INTEGER, str: POINTER TO string, maxsize: INTEGER] RETURNS [BOOLEAN];
nargs: PROC RETURNS [INTEGER];
-- utilities
-- addstr: put c in outset[j] if it fits, increment j
addstr: PROC [c: character, outset: POINTER TO string,
j: POINTER TO INTEGER, maxset: INTEGER] RETURNS [added: BOOLEAN] = {
IF (j↑ > maxset) THEN
added ← FALSE
ELSE {
outset[j↑] ← c;
j↑ ← j↑ + 1;
added ← TRUE};
RETURN};
-- equal: test two strings for equality
equal: PROC [str1, str2: POINTER TO string] RETURNS [BOOLEAN] = {
i: INTEGER ← 1;
WHILE (str1[i] = str2[i]) AND (str1[i] # ENDSTR) DO
i ← i + 1 ENDLOOP;
RETURN [str1[i] = str2[i]]};
-- esc: map s[i↑] into escaped character, increment i↑
esc: PROC [s: POINTER TO string, i: POINTER TO INTEGER] RETURNS [c: character] = {
IF (s[i↑] # ESCAPE) THEN
c ← s[i↑]
ELSE IF (s[i↑+1] = ENDSTR) THEN -- @ not special at end
c ← ESCAPE
ELSE {
i↑ ← i↑ + 1;
IF (s[i↑] = ord['n]) THEN
c ← NEWLINE
ELSE IF (s[i↑] = ord['t]) THEN
c ← TAB
ELSE
c ← s[i↑]};
RETURN};
-- index: find position of character c in string s
index: PROC [s: POINTER TO string, c: character] RETURNS [INTEGER] = {
i: INTEGER ← 1;
WHILE (s[i] # c) AND (s[i] # ENDSTR) DO
i ← i + 1 ENDLOOP;
RETURN [IF (s[i] = ENDSTR) THEN 0 ELSE i]};
-- isalphanum: true if c is a letter or digit
isalphanum: PROC [c: character] RETURNS [BOOLEAN] = {
RETURN [
(c IN [ord['a] .. ord['z]])
OR (c IN [ord['A] .. ord['Z]])
OR (c IN [ord['0] .. ord['9]])]};
-- isdigit: true if c is a digit
isdigit: PROC [c: character] RETURNS [BOOLEAN] = INLINE {
RETURN [c IN [ord['0] .. ord['9]]]};
-- isupper: true if c is an upper case letter
isupper: PROC [c: character] RETURNS [BOOLEAN] = INLINE {
RETURN [c IN [ord['A] .. ord['Z]]]};
-- itoc: convert integer n to char string in s[i] ...
itoc: PROC [n: INTEGER, s: POINTER TO string, i: INTEGER] RETURNS [next: INTEGER] = {
IF (n < 0) THEN {
s[i] ← ord['-];
next ← itoc[-n, s, i+1]}
ELSE {
IF (n >= 10) THEN
i ← itoc[n/10, s, i];
s[i] ← n MOD 10 + ord['0];
s[i+1] ← ENDSTR;
next ← i+1};
RETURN};
-- length: compute length of string
length: PROC [s: POINTER TO string] RETURNS [INTEGER] = {
n: INTEGER ← 1;
WHILE (s[n] # ENDSTR) DO
n ← n + 1 ENDLOOP;
RETURN [n - 1]};
-- mustcreate: create file or die
mustcreate: PROC [name: POINTER TO string, mode: INTEGER] RETURNS [fd: filedesc] = {
fd ← create[name, mode];
IF (fd = IOERROR) THEN {
putstr[name, STDERR];
error[": can't create file"L]};
RETURN};
-- mustopen: open file or die
mustopen: PROC [name: POINTER TO string, mode: INTEGER] RETURNS [fd: filedesc] = {
fd ← open[name, mode];
IF (fd = IOERROR) THEN {
putstr[name, STDERR];
error[": can't open file"L]};
RETURN};
-- scopy: copy string ar st src[i] to dest[j]
scopy: PROC [src: POINTER TO string, i: INTEGER,
dest: POINTER TO string, j: INTEGER] = {
WHILE (src[i] # ENDSTR) DO
dest[j] ← src[i];
i ← i + 1;
j ← j + 1;
ENDLOOP;
dest[j] ← ENDSTR};
-- ctoi: convert string at s[i] to integer, increment i
ctoi: PROC [s: POINTER TO string, i: POINTER TO INTEGER] RETURNS [INTEGER] = {
n, sign: INTEGER;
WHILE (s[i↑] = BLANK) OR (s[i↑] = TAB) DO
i↑ ← i↑ + 1 ENDLOOP;
sign ← IF s[i↑] = ord['-] THEN -1 ELSE 1;
IF (s[i↑] = ord['+]) OR (s[i↑] = ord['-]) THEN
i↑ ← i↑ + 1;
n ← 0;
WHILE (isdigit[s[i↑]]) DO
n ← 10*n + (s[i↑] - ord['0]);
i↑ ← i↑ + 1;
ENDLOOP;
RETURN [sign * n]};
-- fcopy: copy file fin to fout
fcopy: PROC [fin, fout: filedesc] = {
c: character;
WHILE (getcf[@c, fin] # ENDFILE) DO
putcf[c, fout] ENDLOOP};
-- putdec: put decimal integer n in field width >= w
putdec: PROC [n, w: INTEGER] = {
s: string;
nd: INTEGER = itoc[n, @s, 1];
FOR i: INTEGER IN [nd .. w] DO
putc[BLANK] ENDLOOP;
FOR i: INTEGER IN [1 .. nd-1] DO
putc[s[i]] ENDLOOP};
-- tools
-- copy: copy standard input to standard output
copy: PROC = {
c: character;
WHILE (getc[@c] # ENDFILE) DO
putc[c] ENDLOOP};
-- charcount: counts characters in standard input
charcount: PROC = {
nc: INTEGER ← 0;
c: character;
WHILE (getc[@c] # ENDFILE) DO
nc ← nc + 1 ENDLOOP;
putdec[nc, 1];
putc[NEWLINE]};
-- linecount: counts lines in standard input
linecount: PROC = {
nl: INTEGER ← 0;
c: character;
WHILE (getc[@c] # ENDFILE) DO
IF (c = NEWLINE) THEN nl ← nl + 1;
ENDLOOP;
putdec[nl, 1];
putc[NEWLINE]};
-- wordcount: count words in standard input
wordcount: PROC = {
nw: INTEGER ← 0;
inword: BOOLEAN ← FALSE;
c: character;
WHILE (getc[@c] # ENDFILE) DO
IF (c = BLANK) OR (c = NEWLINE) OR (c = TAB) THEN
inword ← FALSE
ELSE IF (~inword) THEN {
inword ← TRUE;
nw ← nw + 1}
ENDLOOP;
putdec[nw, 1];
putc[NEWLINE]};
-- shared definitions
MAXLINE: NAT = 1000;
tabtype: TYPE = PACKED ARRAY [1..MAXLINE] OF BOOLEAN;
TABSPACE: NAT = 8;
tabpos: PROC [col: INTEGER, tabstops: POINTER TO tabtype] RETURNS [BOOLEAN] = {
-- return true if col is a tab stop
RETURN [IF (col > MAXLINE) THEN TRUE ELSE tabstops[col]]};
settabs: PROC [tabstops: POINTER TO tabtype] = {
-- set initial tab stops
FOR i: INTEGER IN [1 .. MAXLINE] DO
tabstops[i] ← (i MOD TABSPACE = 1) ENDLOOP};
-- detab: convert tabs to equivalent number of blanks
detab: PROC = {
c: character;
col: INTEGER ← 1;
tabstops: tabtype;
settabs[@tabstops]; -- set initial tab stops
WHILE (getc[@c] # ENDFILE) DO
IF (c = TAB) THEN
DO
putc[BLANK];
col ← col + 1;
IF tabpos[col, @tabstops] THEN EXIT;
ENDLOOP
ELSE IF (c = NEWLINE) THEN {
putc[NEWLINE];
col ← 1}
ELSE {
putc[c];
col ← col + 1}
ENDLOOP};
-- entab: replace blanks by tabs and blanks
entab: PROC = {
col: INTEGER ← 1;
tabstops: tabtype;
settabs[@tabstops];
DO
newcol: INTEGER ← col;
c: character;
WHILE (getc[@c] = BLANK) DO -- collect blanks
newcol ← newcol + 1;
IF (tabpos[newcol, @tabstops]) THEN {
putc[TAB];
col ← newcol};
ENDLOOP;
WHILE (col < newcol) DO -- output leftover blanks
putc[BLANK];
col ← col + 1;
ENDLOOP;
IF (c # ENDFILE) THEN {
putc[c];
IF (c = NEWLINE) THEN
col ← 1
ELSE
col ← col + 1};
IF (c = ENDFILE) THEN EXIT;
ENDLOOP};
-- overstrike: convert backspaces into multiple lines
overstrike: PROC = {
SKIP: character = BLANK;
NOSKIP: character = ord['+];
c: character;
col: INTEGER ← 1;
DO
newcol: INTEGER ← col;
WHILE (getc[@c] = BACKSPACE) DO -- eat backspaces
newcol ← MAX[newcol-1, 1] ENDLOOP;
IF (newcol < col) THEN {
putc[NEWLINE]; -- start overstrike line
putc[NOSKIP];
FOR i: INTEGER IN [1..newcol) DO
putc[BLANK] ENDLOOP;
col ← newcol}
ELSE IF (col = 1) AND (c # ENDFILE) THEN
putc[SKIP]; -- normal line
-- ELSE middle of line
IF (c = ENDFILE) THEN EXIT;
putc[c]; -- normal character
col ← IF (c = NEWLINE) THEN 1 ELSE col + 1
ENDLOOP};
WARNING: character = ord['~];
-- compress: compress standard input
compress: PROC = {
c, lastc: character;
n: INTEGER;
putrep: PROC [n: INTEGER, c: character] = {
-- put out representation of run of n 'c's
MAXREP: NAT = 26; -- assuming ['A..'Z]
THRESH: NAT = 4;
WHILE (n >= THRESH) OR ((c = WARNING) AND (n > 0)) DO
putc[WARNING];
putc[MIN[n, MAXREP] - 1 + ord['A]];
putc[c];
n ← n - MAXREP
ENDLOOP;
FOR n DECREASING IN [1..n] DO
putc[c] ENDLOOP};
n ← 1;
lastc ← getc[@lastc];
WHILE (lastc # ENDFILE) DO
IF (getc[@c] = ENDFILE) THEN {
IF (n > 1) OR (lastc = WARNING) THEN
putrep[n, lastc]
ELSE
putc[lastc]}
ELSE IF (c = lastc) THEN
n ← n + 1
ELSE IF (n > 1) OR (lastc = WARNING) THEN {
putrep[n, lastc];
n ← 1}
ELSE
putc[lastc];
lastc ← c
ENDLOOP};
-- expand: uncompress standard input
expand: PROC = {
c: character;
n: INTEGER;
WHILE (getc[@c] # ENDFILE) DO
IF (c # WARNING) THEN
putc[c]
ELSE IF (isupper[getc[@c]]) THEN {
n ← c - ord['A] + 1;
IF (getc[@c] # ENDFILE) THEN
FOR n DECREASING IN [1..n] DO
putc[c] ENDLOOP
ELSE {
putc[WARNING];
putc[n - 1 + ord['A]]}}
ELSE {
putc[WARNING];
IF (c # ENDFILE) THEN
putc[c]}
ENDLOOP};
-- echo: echo command line arguments to output
echo: PROC = {
argstr: string;
i: INTEGER ← 1;
WHILE (getarg[i, @argstr, MAXSTR]) DO
IF (i > 1) THEN
putc[BLANK];
FOR j: INTEGER IN [1 .. length[@argstr]] DO
putc[argstr[j]] ENDLOOP;
i ← i + 1
ENDLOOP;
IF (i > 1) THEN putc[NEWLINE]};
-- translit: map characters
translit: PROC = {
NEGATE: character = ord['↑];
arg, fromset, toset: string;
c: character;
i, lastto: [0..MAXSTR];
allbut, squash: BOOLEAN;
makeset: PROC [inset: POINTER TO string, k: INTEGER,
outset: POINTER TO string, maxset: INTEGER] RETURNS [BOOLEAN] = {
-- makeset: make set from inset[k] in outset
dodash: PROC [delim: character, src: POINTER TO string,
i: POINTER TO INTEGER, dest: POINTER TO string,
j: POINTER TO INTEGER, maxset: INTEGER] = {
-- dodash: expand set at src[i↑] into dest[j↑], stop at delim
DASH: character = ord['-];
WHILE (src[i↑] # delim) AND (src[i↑] # ENDSTR) DO
IF (src[i↑] = ESCAPE) THEN
[] ← addstr[esc[src, i], dest, j, maxset]
ELSE IF (src[i↑] # DASH) THEN
[] ← addstr[src[i↑], dest, j, maxset]
ELSE IF (j↑ <= 1) OR (src[i↑+1] = ENDSTR) THEN
[] ← addstr[DASH, dest, j, maxset] -- literal -
ELSE IF (isalphanum[src[i↑-1]])
AND (isalphanum[src[i↑+1]])
AND (src[i↑-1] <= src[i↑+1]) THEN {
FOR k: character IN (src[i↑-1] .. src[i↑+1]] DO
[] ← addstr[k, dest, j, maxset] ENDLOOP;
i↑ ← i↑ + 1}
ELSE
[] ← addstr[DASH, dest, j, maxset];
i↑ ← i↑ + 1
ENDLOOP};
j: INTEGER ← 1;
dodash[ENDSTR, inset, @k, outset, @j, maxset];
RETURN [addstr[ENDSTR, outset, @j, maxset]]};
xindex: PROC [inset: POINTER TO string, c: character,
allbut: BOOLEAN, lastto: INTEGER] RETURNS [INTEGER] = {
-- xindex: conditionally invert value from index
RETURN [
IF (c = ENDFILE) THEN 0
ELSE IF (~allbut) THEN index[inset, c]
ELSE IF (index[inset, c] > 0) THEN 0
ELSE lastto + 1]};
IF (~getarg[1, @arg, MAXSTR]) THEN
error["usage: translit from to"L];
allbut ← (arg[1] = NEGATE);
i ← IF (allbut) THEN 2 ELSE 1;
IF (~makeset[@arg, i, @fromset, MAXSTR]) THEN
error["translit: ""from"" set too large"L];
IF (~getarg[2, @arg, MAXSTR]) THEN
toset[1] ← ENDSTR
ELSE IF (~makeset[@arg, 1, @toset, MAXSTR]) THEN
error["translit: ""to"" set too large"L]
ELSE IF (length[@fromset] < length[@toset]) THEN
error["translit: ""from"" shorter than ""to"""L];
lastto ← length[@toset];
squash ← (length[@fromset] > lastto) OR (allbut);
DO
i ← xindex[@fromset, getc[@c], allbut, lastto];
IF (squash) AND (i>=lastto) AND (lastto>0) THEN {
putc[toset[lastto]];
DO
i ← xindex[@fromset, getc[@c], allbut, lastto];
IF (i < lastto) THEN EXIT;
ENDLOOP};
IF (c = ENDFILE) THEN EXIT;
IF (i > 0) AND (lastto > 0) THEN -- translate
putc[toset[i]]
ELSE IF (i = 0) THEN -- copy
putc[c]
-- ELSE delete --
ENDLOOP};
-- compare: compare two files for equality
compare: PROC = {
line1, line2: string;
arg1, arg2: string;
lineno: INTEGER ← 0;
infile1, infile2: filedesc;
f1, f2: BOOLEAN;
diffmsg: PROC [n: INTEGER, line1, line2: POINTER TO string] = {
-- diffmsg: print line numbers and differing lines
putdec[n, 1];
putc[ord[':]];
putc[NEWLINE];
putstr[line1, STDOUT];
putstr[line2, STDOUT]};
IF (~getarg[1, @arg1, MAXSTR])
OR (~getarg[2, @arg2, MAXSTR]) THEN
error["usage: compare file1 file2"L];
infile1 ← mustopen[@arg1, IOREAD];
infile2 ← mustopen[@arg2, IOREAD];
DO
lineno ← lineno + 1;
f1 ← getline[@line1, infile1, MAXSTR];
f2 ← getline[@line2, infile2, MAXSTR];
IF (~f1 OR ~f2) THEN EXIT;
IF (~equal[@line1, @line2]) THEN
diffmsg[lineno, @line1, @line2]
ENDLOOP;
IF (f2 AND ~f1) THEN
message["compare: end of file on file1"L]
ELSE IF (f1 AND ~f2) THEN
message["compare: end of file on file2"L]};
-- include: replace #include "file" by contents of file
include: PROC = {
incl: string; -- value is '#include'
finclude: PROC [f: filedesc] = {
-- finclude: include file desc f
line, str: string;
loc, i: INTEGER;
f1: filedesc;
getword: PROC [s: POINTER TO string, i: INTEGER,
out: POINTER TO string] RETURNS [INTEGER] = {
j: INTEGER ← 1;
WHILE (s[i] = BLANK) OR (s[i] = TAB) OR (s[i] = NEWLINE) DO
i ← i + 1 ENDLOOP;
WHILE (~((s[i] = ENDSTR)
OR (s[i] = BLANK) OR (s[i] = TAB) OR (s[i] = NEWLINE))) DO
out[j] ← s[i];
i ← i + 1;
j ← j + 1
ENDLOOP;
out[j] ← ENDSTR;
RETURN[IF (s[i] = ENDSTR) THEN 0 ELSE i]};
WHILE (getline[@line, f, MAXSTR]) DO
loc ← getword[@line, 1, @str];
IF (~equal[@str, @incl]) THEN
putstr[@line, STDOUT]
ELSE {
loc ← getword[@line, loc, @str];
str[length[@str]] ← ENDSTR; -- remove quotes
FOR i IN [1 .. length[@str]] DO
str[i] ← str[i+1] ENDLOOP;
f1 ← mustopen[@str, IOREAD];
finclude[f1];
close[f1]}
ENDLOOP};
setstring[@incl, "#include"L];
finclude[STDIN]};
-- concat: concatenate files onto standard input
concat: PROC = {
fd: filedesc;
s: string;
FOR i: INTEGER IN [1 .. nargs[]] DO
[] ← getarg[i, @s, MAXSTR];
fd ← mustopen[@s, IOREAD];
fcopy[fd, STDOUT];
close[fd]
ENDLOOP};
-- print: (default input STDIN) print files with headings
print: PROC = {
name: string;
null: string; -- value ''
fin: filedesc;
fprint: PROC [name: POINTER TO string, fin: filedesc] = {
-- fprint: print file "name" from fin
MARGIN1: NAT = 2;
MARGIN2: NAT = 2;
BOTTOM: NAT = 64;
PAGELEN: NAT = 66;
line: string;
lineno, pageno: INTEGER;
skip: PROC [n: INTEGER] = {
-- skip: output n blank lines
FOR i: INTEGER IN [1..n] DO
putc[NEWLINE] ENDLOOP};
head: PROC [name: POINTER TO string, pageno: INTEGER] = {
-- head: print top of page header
page: string; -- set to ' Page '
setstring[@page, " Page "L];
putstr[name, STDOUT];
putstr[@page, STDOUT];
putdec[pageno, 1];
putc[NEWLINE]};
pageno ← 1;
skip[MARGIN1];
head[name, pageno];
skip[MARGIN2];
lineno ← MARGIN1 + MARGIN2 + 1;
WHILE (getline[@line, fin, MAXSTR]) DO
IF (lineno = 0) THEN {
skip[MARGIN1];
pageno ← pageno + 1;
head[name, pageno];
skip[MARGIN2];
lineno ← MARGIN1 + MARGIN2 + 1};
putstr[@line, STDOUT];
lineno ← lineno + 1;
IF (lineno >= BOTTOM) THEN {
skip[PAGELEN-lineno];
lineno ← 0}
ENDLOOP;
IF (lineno > 0) THEN
skip[PAGELEN-lineno]};
setstring[@null, ""L];
IF (nargs[] = 0) THEN
fprint[@null, STDIN]
ELSE
FOR i: INTEGER IN [1 .. nargs[]] DO
[] ← getarg[i, @name, MAXSTR];
fin ← mustopen[@name, IOREAD];
fprint[@name, fin];
close[fin]
ENDLOOP};
-- makecopy: copy one file to another
makecopy: PROC = {
inname, outname: string;
fin, fout: filedesc;
IF (~getarg[1, @inname, MAXSTR])
OR (~getarg[2, @outname, MAXSTR]) THEN
error["usage: makecopy old new"L];
fin ← mustopen[@inname, IOREAD];
fout ← mustcreate[@outname, IOWRITE];
fcopy[fin, fout];
close[fin];
close[fout]};
-- sort: sort text lines in memory
inmemsort: PROC = {
MAXCHARS: NAT = 10000; -- maximum # of text characters
MAXLINES: NAT = 300; -- maximum # of lines
charbuf: TYPE = ARRAY [1..MAXCHARS] OF character;
charpos: TYPE = [1..MAXCHARS];
posbuf: TYPE = ARRAY [1..MAXLINES] OF charpos;
pos: TYPE = [0..MAXLINES];
linebuf: charbuf;
linepos: posbuf;
nlines: pos;
gtext: PROC [linepos: POINTER TO posbuf, nlines: POINTER TO pos,
linebuf: POINTER TO charbuf, infile: filedesc] RETURNS [BOOLEAN] = {
-- gtext: get text lines into linebuf
len: INTEGER;
nextpos: charpos ← 1;
temp: string;
done: BOOLEAN;
nlines↑ ← 0;
DO
done ← (~getline[@temp, infile, MAXSTR]);
IF (~done) THEN {
nlines↑ ← nlines↑ + 1;
linepos[nlines↑] ← nextpos;
len ← length[@temp];
FOR i: INTEGER IN [1..len] DO
linebuf[nextpos+i-1] ← temp[i] ENDLOOP;
linebuf[nextpos+len] ← ENDSTR;
nextpos ← nextpos + len + 1}; -- 1 for ENDSTR
IF (done) OR (nextpos >= MAXCHARS-MAXSTR)
OR (nlines↑ >= MAXLINES) THEN EXIT
ENDLOOP;
RETURN [done]};
shell: PROC [linepos: POINTER TO posbuf, nlines: INTEGER,
linebuf: POINTER TO charbuf] = {
-- shell: ascending Shell sort for lines
gap: INTEGER ← nlines/2;
cmp: PROC [i, j: charpos, linebuf: POINTER TO charbuf]
RETURNS [INTEGER] = INLINE {
-- cmp: compare linebuf[i] with linebuf[j]
WHILE (linebuf[i] = linebuf[j])
AND (linebuf[i] # ENDSTR) DO
i ← i + 1;
j ← j + 1
ENDLOOP;
RETURN [
IF (linebuf[i] = linebuf[j]) THEN 0
ELSE IF (linebuf[i] = ENDSTR) THEN -1 -- 1st is shorter
ELSE IF (linebuf[j] = ENDSTR) THEN 1 -- 2nd is shorter
ELSE IF (linebuf[i] < linebuf[j]) THEN -1
ELSE 1]};
exchange: PROC [lp1, lp2: POINTER TO charpos] = INLINE {
-- exchange: exchange linebuf[lp1] with linebuf[lp2]
temp: charpos ← lp1↑;
lp1↑ ← lp2↑;
lp2↑ ← temp};
WHILE (gap > 0) DO
FOR i: INTEGER IN [gap+1 .. nlines] DO
j: INTEGER ← i - gap;
WHILE (j > 0) DO
jg: INTEGER ← j + gap;
IF (cmp[linepos[j], linepos[jg], linebuf] <= 0) THEN
j ← 0 -- force loop termination
ELSE
exchange[@linepos[j], @linepos[jg]];
j ← j - gap
ENDLOOP;
ENDLOOP;
gap ← gap/2
ENDLOOP};
ptext: PROC [linepos: POINTER TO posbuf, nlines: pos,
linebuf: POINTER TO charbuf, outfile: filedesc] = {
-- ptext: output text lines from linebuf
FOR i: pos IN [1..nlines] DO
j: charpos ← linepos[i];
WHILE (linebuf[j] # ENDSTR) DO
putcf[linebuf[j], outfile];
j ← j + 1
ENDLOOP;
ENDLOOP};
IF (gtext[@linepos, @nlines, @linebuf, STDIN]) THEN {
shell[@linepos, nlines, @linebuf];
ptext[@linepos, nlines, @linebuf, STDOUT]}
ELSE
error["sort: input too big to sort"L]};
-- sort: external sort of text lines
sort: PROC = {
MAXCHARS: NAT = 10000; -- maximum number of text chars
MAXLINES: NAT = 300; -- maximum number of lines
MERGEORDER: NAT = 5;
charpos: TYPE = [1..MAXCHARS];
charbuf: TYPE = ARRAY [1..MAXCHARS] OF character;
posbuf: TYPE = ARRAY [1..MAXLINES] OF charpos;
pos: TYPE = [0..MAXLINES];
fdbuf: TYPE = ARRAY [1..MERGEORDER] OF filedesc;
linebuf: charbuf;
linepos: posbuf;
nlines: pos;
infile: fdbuf;
outfile: filedesc;
high, low, lim: INTEGER;
done: BOOLEAN;
name: string;
cmp: PROC [i, j: charpos, linebuf: POINTER TO charbuf]
RETURNS [INTEGER] = INLINE {
-- cmp: compare linebuf[i] with linebuf[j]
WHILE (linebuf[i] = linebuf[j])
AND (linebuf[i] # ENDSTR) DO
i ← i + 1;
j ← j + 1
ENDLOOP;
RETURN [
IF (linebuf[i] = linebuf[j]) THEN 0
ELSE IF (linebuf[i] = ENDSTR) THEN -1 -- 1st is shorter
ELSE IF (linebuf[j] = ENDSTR) THEN 1 -- 2nd is shorter
ELSE IF (linebuf[i] < linebuf[j]) THEN -1
ELSE 1]};
exchange: PROC [lp1, lp2: POINTER TO charpos] = INLINE {
-- exchange: exchange linebuf[lp1] with linebuf[lp2]
temp: charpos ← lp1↑;
lp1↑ ← lp2↑;
lp2↑ ← temp};
gtext: PROC [linepos: POINTER TO posbuf, nlines: POINTER TO pos,
linebuf: POINTER TO charbuf, infile: filedesc] RETURNS [BOOLEAN] = {
-- gtext: get text lines into linebuf
len: INTEGER;
nextpos: charpos ← 1;
temp: string;
done: BOOLEAN;
nlines↑ ← 0;
DO
done ← (~getline[@temp, infile, MAXSTR]);
IF (~done) THEN {
nlines↑ ← nlines↑ + 1;
linepos[nlines↑] ← nextpos;
len ← length[@temp];
FOR i: INTEGER IN [1..len] DO
linebuf[nextpos+i-1] ← temp[i] ENDLOOP;
linebuf[nextpos+len] ← ENDSTR;
nextpos ← nextpos + len + 1}; -- 1 for ENDSTR
IF (done) OR (nextpos >= MAXCHARS-MAXSTR)
OR (nlines↑ >= MAXLINES) THEN EXIT
ENDLOOP;
RETURN [done]};
ptext: PROC [linepos: POINTER TO posbuf, nlines: pos,
linebuf: POINTER TO charbuf, outfile: filedesc] = {
-- ptext: output text lines from linebuf
FOR i: pos IN [1..nlines] DO
j: charpos ← linepos[i];
WHILE (linebuf[j] # ENDSTR) DO
putcf[linebuf[j], outfile];
j ← j + 1
ENDLOOP;
ENDLOOP};
quick: PROC [linepos: POINTER TO posbuf, nlines: pos,
linebuf: POINTER TO charbuf] = {
-- quick: quicksort for lines
rquick: PROC [lo, hi: INTEGER] = {
-- rquick: recursive quicksort
IF (lo < hi) THEN {
i: INTEGER ← lo;
j: INTEGER ← hi;
pivline: charpos ← linepos[j]; -- pivot line
DO
WHILE (i < j)
AND (cmp[linepos[i], pivline, linebuf] <= 0) DO
i ← i + 1 ENDLOOP;
WHILE (j > i)
AND (cmp[linepos[j], pivline, linebuf] >= 0) DO
j ← j - 1 ENDLOOP;
IF (i < j) THEN -- out of order pair
exchange[@linepos[i], @linepos[j]];
IF (i >= j) THEN EXIT
ENDLOOP;
exchange[@linepos[i], @linepos[hi]]; -- move pivot to i
IF (i - lo < hi - i) THEN {
rquick[lo, i-1];
rquick[i+1, hi]}
ELSE {
rquick[i+1, hi];
rquick[lo, i-1]}}};
rquick[1, nlines]};
gname: PROC [n: INTEGER, name: POINTER TO string] = {
-- gname: generate unique name for file id n
setstring[name, "$stemp"L];
[] ← itoc[n, name, length[name]+1]};
makefile: PROC [n: INTEGER] RETURNS [filedesc] = {
-- makefile: manke new file for number n
name: string;
gname[n, @name];
RETURN [mustcreate[@name, IOWRITE]]};
gopen: PROC [infile: POINTER TO fdbuf, f1, f2: INTEGER] = {
-- gopen: open group of files f1 ... f2
name: string;
FOR i: [1..MERGEORDER] IN [1 .. f2-f1+1] DO
gname[f1+i-1, @name];
infile[i] ← mustopen[@name, IOREAD]
ENDLOOP};
gremove: PROC [infile: POINTER TO fdbuf, f1, f2: INTEGER] = {
-- gremove: remove group of files f1 ... f2
name: string;
FOR i: [1..MERGEORDER] IN [1 .. f2-f1+1] DO
close[infile[i]];
gname[f1+i-1, @name];
remove[@name];
ENDLOOP};
merge: PROC [infile: POINTER TO fdbuf, nf: INTEGER,
outfile: filedesc] = {
-- merge: merge infile[1] ... infile[nf] onto outfile
j: INTEGER ← 0;
lbp: charpos;
temp: string;
reheap: PROC [linepos: POINTER TO posbuf, nf: pos,
linebuf: POINTER TO charbuf] = {
-- reheap: put linebuf[linepos[1]] in proper place in heap
i: INTEGER ← 1;
j: INTEGER ← 2*i;
WHILE (j <= nf) DO
IF (j < nf) THEN -- find smaller child
IF (cmp[linepos[j], linepos[j+1], linebuf] > 0) THEN
j ← j + 1;
IF (cmp[linepos[i], linepos[j], linebuf] <= 0) THEN
i ← nf -- proper position found, terminate loop
ELSE
exchange[@linepos[i], @linepos[j]]; -- percolate
i ← j;
j ← 2 * i
ENDLOOP};
sccopy: PROC [s: POINTER TO string, cb: POINTER TO charbuf,
i: charpos] = {
-- sccopy: copy string s to cb[i]
j: INTEGER ← 1;
WHILE (s[j] # ENDSTR) DO
cb[i] ← s[j];
j ← j + 1;
i ← i + 1
ENDLOOP;
cb[i] ← ENDSTR};
cscopy: PROC [cb: POINTER TO charbuf, i: charpos,
s: POINTER TO string] = {
-- sccopy: copy cb[i] to string s
j: INTEGER ← 1;
WHILE (cb[i] # ENDSTR) DO
s[j] ← cb[i];
i ← i + 1;
j ← j + 1
ENDLOOP;
s[j] ← ENDSTR};
FOR i: INTEGER IN [1..nf] DO -- get one line from each file
IF (getline[@temp, infile[i], MAXSTR]) THEN {
lbp ← (i-1)*MAXSTR + 1; -- room for longest
sccopy[@temp, @linebuf, lbp];
linepos[i] ← lbp;
j ← j + 1}
ENDLOOP;
nf ← j;
quick[@linepos, nf, @linebuf]; -- make initial heap
WHILE (nf > 0) DO
i: INTEGER;
lbp ← linepos[1]; -- lowest line
cscopy[@linebuf, lbp, @temp];
putstr[@temp, outfile];
i ← lbp/MAXSTR + 1; -- compute file index
IF (getline[@temp, infile[i], MAXSTR]) THEN
sccopy[@temp, @linebuf, lbp]
ELSE { -- one less input file
linepos[1] ← linepos[nf];
nf ← nf - 1};
reheap[@linepos, nf, @linebuf]
ENDLOOP};
high ← 0;
DO -- initial formation of runs
done ← gtext[@linepos, @nlines, @linebuf, STDIN];
quick[@linepos, nlines, @linebuf];
high ← high + 1;
outfile ← makefile[high];
ptext[@linepos, nlines, @linebuf, outfile];
close[outfile];
IF done THEN EXIT
ENDLOOP;
low ← 1;
WHILE (low < high) DO -- merge runs
lim ← MIN[low+MERGEORDER-1, high];
gopen[@infile, low, lim];
high ← high + 1;
outfile ← makefile[outfile];
merge[@infile, lim-low+1, outfile];
close[outfile];
gremove[@infile, low, lim];
low ← low + MERGEORDER
ENDLOOP;
gname[high, @name]; -- final cleanup
outfile ← open[@name, IOREAD];
fcopy[outfile, STDOUT];
close[outfile];
remove[@name]};
-- unique: remove adjacent duplicate lines
unique: PROC = {
buf: ARRAY [0..1] OF string;
cur: [0..1] ← 1;
buf[1-cur][1] ← ENDSTR;
WHILE (getline[@buf[cur], STDIN, MAXSTR]) DO
IF (~equal[@buf[cur], @buf[1-cur]]) THEN {
putstr[@buf[cur], STDOUT];
cur ← 1 - cur}
ENDLOOP};
-- kwic: make keyword in context index
kwic: PROC = {
FOLD: character = ord['$];
buf: string;
putrot: PROC [buf: POINTER TO string] = {
-- putrot: create lines with keyword at front
rotate: PROC [buf: POINTER TO string, n: INTEGER] = {
-- rotate: output rotated line
i: INTEGER ← n;
WHILE (buf[i] # NEWLINE) AND (buf[i] # ENDSTR) DO
putc[buf[i]];
i ← i + 1
ENDLOOP;
putc[FOLD];
FOR i: INTEGER IN [1 .. n-1] DO
putc[buf[i]] ENDLOOP;
putc[NEWLINE]};
i: INTEGER ← 1;
WHILE (buf[i] # NEWLINE) AND (buf[i] # ENDSTR) DO
IF (isalphanum[buf[i]]) THEN {
rotate[buf, i]; -- token starts at "i"
i ← i + 1;
WHILE (~isalphanum[buf[i]]) DO
i ← i + 1 ENDLOOP};
i ← i + 1
ENDLOOP};
WHILE (getline[@buf, STDIN, MAXSTR]) DO
putrot[@buf] ENDLOOP};
-- unrotate: unrotate lines rotated by kwic
unrotate: PROC = {
MAXOUT: NAT = 80;
MIDDLE: NAT = 40;
FOLD: character = ord['$];
inbuf, outbuf: string;
WHILE (getline[@inbuf, STDIN, MAXSTR]) DO
j, f: INTEGER;
FOR i: INTEGER IN [1 .. MAXOUT) DO
outbuf[i] ← BLANK ENDLOOP;
f ← index[@inbuf, FOLD];
j ← MIDDLE - 1;
FOR i: INTEGER DECREASING IN (f .. length[@inbuf]) DO
outbuf[j] ← inbuf[i];
j ← j - 1;
IF (j <= 0) THEN
j ← MAXOUT - 1
ENDLOOP;
j ← MIDDLE + 1;
FOR i: INTEGER IN [1 .. f) DO
outbuf[j] ← inbuf[i];
j ← j MOD (MAXOUT-1) + 1
ENDLOOP;
FOR k: INTEGER IN [1 .. MAXOUT) DO
IF (outbuf[k] # BLANK) THEN
j ← k;
ENDLOOP;
outbuf[j+1] ← ENDSTR;
putstr[@outbuf, STDOUT];
putc[NEWLINE]
ENDLOOP};
}.