-- TexHyph.mesa

-- last written by Doug Wyatt, November 10, 1979 5:51 PM

DIRECTORY
TexDefs: FROM "TexDefs" USING[FChar],
TexHyphDefs: FROM "TexHyphDefs",
TexJustifyDefs: FROM "TexJustifyDefs",
TexMemDefs: FROM "TexMemDefs",
TexNodeDefs: FROM "TexNodeDefs" USING[MakeDiscNode, NodePtr],
TexStringDefs: FROM "TexStringDefs" USING[AppendChar, AppendString],
InlineDefs: FROM "InlineDefs" USING[BITAND, BITOR, BITSHIFT];

TexHyph: PROGRAM
IMPORTS TexHyphDefs,TexMemDefs,TexNodeDefs,TexStringDefs,InlineDefs
EXPORTS TexHyphDefs,TexJustifyDefs =
BEGIN OPEN TexHyphDefs;

XTable: POINTER TO ARRAY XIndex OF STRING ← NIL;
-- ordered hash table
XHyph: POINTER TO ARRAY XIndex OF HyphBits ← NIL;
-- companion hyphen position table

XInit: PROCEDURE =
BEGIN j: XIndex;
FOR j IN [0..XSize) DO XTable[j]←NIL; XHyph[j]←Zeros ENDLOOP;
END;


XLookup: PROCEDURE [s:STRING] RETURNS [intable:BOOLEAN, positions:HyphBits] =
BEGIN h: XIndex; t: STRING; comp: Comparison;
t←TexMemDefs.AllocString[s.length+1];
TexStringDefs.AppendString[t,s];
IF t.length<7 THEN TexStringDefs.AppendChar[t,’ ];
DO h←XHash[t];
WHILE (comp←XCompare[XTable[h],t])=greater DO
h←h-1;
IF h=0 THEN h←XSize-1;
ENDLOOP;
IF comp=equal THEN BEGIN intable←TRUE; positions←XHyph[h]; EXIT; END;
IF s.length=7 THEN TexStringDefs.AppendChar[t,’ ];
IF t[t.length-2]#’s THEN BEGIN intable←FALSE; positions←Zeros; EXIT; END;
t[t.length-2]←’ ;
t.length←t.length-1;
ENDLOOP;
TexMemDefs.FreeString[t];
END;

XEnt: PUBLIC PROCEDURE [s: STRING] =
BEGIN OPEN InlineDefs;
i,n: CARDINAL←0; c: CHARACTER; v,w: HyphBits←Zeros; t,j: STRING;
starseen: BOOLEAN←FALSE; comp: Comparison; h: XIndex;
t←TexMemDefs.AllocString[s.length+1];
FOR i IN [0..s.length) DO
c←s[i];
IF c=’- THEN w←BITOR[w,1]
ELSE IF c=’* THEN starseen←TRUE
ELSE BEGIN
n←n+1; w←BITSHIFT[w,1];
IF n<=7 OR starseen THEN TexStringDefs.AppendChar[t,c];
starseen←FALSE;
END;
ENDLOOP;
IF n>16 THEN
BEGIN
END;
w←BITSHIFT[w,16-n];
IF n<7 THEN TexStringDefs.AppendChar[t,’ ];
h←XHash[t];
WHILE t#NIL DO
WHILE (comp←XCompare[XTable[h],t])=greater DO h←h-1 ENDLOOP;
IF h=0 THEN h←XSize-1
ELSE IF comp=equal THEN
BEGIN
ERROR;
END
ELSE BEGIN
j←XTable[h]; v←XHyph[h];
XTable[h]←t; XHyph[h]←w;
t←j; w←v;
END;
ENDLOOP;
END;

XHash: PROCEDURE [s: STRING] RETURNS [XIndex] =
BEGIN i,acc: CARDINAL←0;
FOR i IN [0..MIN[6,s.length]) DO acc←7*acc+LOOPHOLE[s[i],INTEGER]; ENDLOOP;
RETURN[XSize-(acc MOD XSize)-1];
END;

XCompare: PROCEDURE [s,t: STRING] RETURNS [Comparison] =
BEGIN i: CARDINAL;
IF s=NIL THEN RETURN[less];
-- not necessary to test t.length since t cannot be NIL
FOR i IN [0..MIN[s.length, t.length])

DO -- assume only lower-case letters
IF s[i] < t[i] THEN RETURN[less];
IF s[i] > t[i] THEN RETURN[greater];
ENDLOOP;
RETURN [equal];
END;
-- Data structures implementing sets of letters;

charSetTable: PUBLIC POINTER TO CharSetTable ← NIL;

InSet: PROCEDURE [c: CHARACTER, set: CharSetIndex] RETURNS [BOOLEAN] =
BEGIN OPEN InlineDefs;
IF o[c]<o[’p] THEN
RETURN[LOOPHOLE[BITSHIFT[charSetTable[set][0],o[c]],INTEGER]<0]
ELSE
RETURN[LOOPHOLE[BITSHIFT[charSetTable[set][1],o[c]-o[’p]],INTEGER]<0];
END;

o
: PROCEDURE [c:CHARACTER] RETURNS [FiveBitChar] =
INLINE BEGIN
RETURN[InlineDefs.BITAND[LOOPHOLE[c,WORD],37B]];
END;


-- The suffix table;

suffixTable: PUBLIC POINTER TO SuffixTable←NIL;

SufGet: PROCEDURE [i: SufIndex] RETURNS [SufEntry] =
INLINE BEGIN
RETURN[suffixTable[i]];
END;

-- The prefix table;

prefixTable: PUBLIC POINTER TO PrefixTable←NIL;

PrefGet: PROCEDURE [i: PrefIndex] RETURNS [PrefEntry] =
INLINE BEGIN
RETURN[prefixTable[i]];
END;

-- The consonant pairs table

cpTable: PUBLIC POINTER TO CPTable←NIL;

CPGet: PROCEDURE [i: CPIndex] RETURNS [CPEntry] =
INLINE BEGIN
RETURN[cpTable[i]];
END;

Hyphenate: PUBLIC PROCEDURE
[p: TexNodeDefs.NodePtr, n: CARDINAL, dhyphen: TexDefs.FChar] =
BEGIN i,j: CARDINAL; finale: CARDINAL←177777B; q: TexNodeDefs.NodePtr;
exception,firsttime: BOOLEAN; hyphens: HyphBits; b,c,d,hyphchar: CHARACTER;
sufpc: SufIndex; prefpc: PrefIndex; sufcode: SufInstr; prefcode: PrefInstr;
t: PrefSufArgType; falsex: CARDINAL; se: SufEntry; pe: PrefEntry;

--Phase 0: Construct input-hyphen position word
n←MIN[n,MaxWL];
s[0]←s[n+1]←hyphchar←0C; --must be zero, so it looks like a vowel!
q←p; i←0;
UNTIL i=n DO
WITH qq:q SELECT FROM
kern => NULL;
char =>BEGIN s[i+1]←wc[i]←qq.c.char; i←i+1; END;
ENDCASE => ERROR;
q←q.link;
ENDLOOP;
s.length←n+2; wc.length←n;

--Phase 1: Search exception dictionary
[exception,hyphens]←XLookup[wc];
IF exception THEN
BEGIN
i←2;
WHILE hyphens#0 DO
IF hyphens>77777B THEN s[i]←hyphchar;
hyphens←InlineDefs.BITSHIFT[hyphens,1]; i←i+1;
ENDLOOP;
--DEBUG WriteString["From exception table: "];
END
ELSE BEGIN -- apply rules

--Phase 2: Remove suffixes
i←n-1; firsttime←TRUE;
WHILE i>=3 DO
IF s[i+1]=’e THEN finale←i+1
ELSE IF finale=77776B THEN finale←i+2 ELSE finale←77777B;
sufpc←o[s[i+1]];
DO
[sufcode,t,sufpc,falsex]←se←SufGet[sufpc];
SELECT sufcode FROM
scan =>IF s[i]#t.c THEN sufpc←falsex ELSE i←i-1;
double =>IF s[i]#s[i-1] THEN sufpc←falsex ELSE i←i-1;
table =>IF ~InSet[s[i],t.s] THEN sufpc←falsex ELSE i←i-1;
again =>IF firsttime THEN BEGIN i←n-2; EXIT; END;
check =>IF i<=3 THEN sufpc←falsex;
mark =>IF t.i#0 OR firsttime THEN s[i+t.i+1]←hyphchar;
ENDCASE => EXIT;
ENDLOOP;
SELECT sufcode FROM
success =>BEGIN s[i+t.i+1]←hyphchar; EXIT; END;
fail =>EXIT;
repeat =>BEGIN i←i+t.i-1; s[i+2]←hyphchar; END;
efail =>IF s[n]=’d AND s[n-1]=’e THEN
BEGIN i←n-3; finale←77776B; s[i+2]←hyphchar; END
ELSE EXIT;
ENDCASE;
firsttime←FALSE;
ENDLOOP;

--Phase 3: Remove prefixes
prefpc←o[s[1]]; i←2; j←i-1;
DO
[prefcode,t,prefpc,falsex]←pe←PrefGet[prefpc];
SELECT prefcode FROM
scan =>IF s[i]#t.c THEN prefpc←falsex ELSE i←i+1;
repeat =>IF s[i←i-t.i+1]=hyphchar THEN EXIT ELSE
BEGIN prefpc←o[s[i-1]]; s[i-1]←hyphchar; j←i-1; END;
ENDCASE =>EXIT;
ENDLOOP;
SELECT prefcode FROM
mark =>IF t.i#0 THEN s[i-t.i]←hyphchar;
table =>IF ~InSet[s[i],t.s] THEN c←s[i←j] ELSE prefcode←mark;
vow =>c←s[i←i-1];
cons =>c←s[i];
ENDCASE;

IF prefcode=mark THEN
IF s[i+1]#hyphchar THEN BEGIN c←s[i]; s[i]←hyphchar END ELSE prefcode←fail;

IF prefcode#repeat AND prefcode#fail THEN
--Phase 4: Apply consonant pairs rules
DO -- c=s[i] at the top of the loop
WHILE ~InSet [c,aeiouy0] DO c←s[i←i+1] ENDLOOP;
WHILE InSet[s[i],aeiouy] DO i←i+1 ENDLOOP;
IF (b←s[i])=hyphchar THEN EXIT;
i←i+1; c←s[i];
IF c=’h AND (j←CPGet[o[b]].hchar)#0 THEN
BEGIN b←b+j-2; c←s[i←i+1]; END;
SELECT TRUE FROM
b=’q AND c=’u => s[i-1]←hyphchar;
b=’c AND c=’k => BEGIN c←s[i←i+1]; s[i]←hyphchar; END;
InSet[c,aeiouy0] => LOOP;
b=c =>
SELECT TRUE FROM
c#’l AND c#’s => s[i]←hyphchar;
s[i+1]=’e AND s[i+2]=’r AND s[i+3]=hyphchar => EXIT;
InSet[s[i+1],aeiouy] => s[i]←hyphchar;
ENDCASE => LOOP;
(IF s[(j←i)+1]=’h AND (d←c+CPGet[o[c]].hchar-2)#c-2
THEN c←s[i←i+1] ELSE d←c)=hyphchar => EXIT;
~InSet[s[i+1],aeiouy] => LOOP; -- three consonants in a row
~InSet[d,CPGet[o[b]].set] => s[j]←hyphchar;
~InSet[d,CPGet[CPGet[o[b]].weak+26].set] => LOOP;
s[i+1]=’a AND s[i+2]=’g AND finale=i+3 => EXIT;
s[i+1]=’e AND s[i+2]=’s AND s[i+3]=’t AND s[i+4]=hyphchar => EXIT;
s[i+1]=’e AND s[i+2]=’r AND s[i+3]=hyphchar => EXIT;
ENDCASE => s[i]←hyphchar;
ENDLOOP;
END; -- apply rules

--Phase 5: Insert hyphens (but don’t isolate one char, or two at end)
q←p.link;
FOR i IN [3..n-2] DO
WITH qq:q SELECT FROM
kern => q←q.link;
ENDCASE;
IF finale ~IN [i..i+2] AND s[i]=hyphchar
THEN BEGIN t: TexNodeDefs.NodePtr←TexNodeDefs.MakeDiscNode[dhyphen];
t.link←q.link; q←q.link←t;
END;
q←q.link;
ENDLOOP;
END; -- of Hyphenate

s: STRING ← NIL;
wc: STRING ← NIL;

HyphInit: PROCEDURE =
BEGIN OPEN TexMemDefs;
XTable ← AllocMem[XSize];
XHyph ← AllocMem[XSize];
charSetTable ← AllocMem[SIZE[CharSetTable]];
suffixTable ← AllocMem[SIZE[SuffixTable]];
prefixTable ← AllocMem[SIZE[PrefixTable]];
cpTable ← AllocMem[SIZE[CPTable]];
s ← AllocString[MaxWL+2];
wc ← AllocString[MaxWL];
XInit;
START TexHyphInit;
END;

HyphInit;

END.