DIRECTORY IO, Rope USING [ Concat, Equal, Fetch, Length, ROPE, Substr ], RuntimeError USING [ BoundsFault ], Synthesizer, VoiceUtils USING [ Report ] ; SynthesizerImpl: CEDAR PROGRAM IMPORTS Rope, RuntimeError, VoiceUtils EXPORTS Synthesizer= { BreakText: PUBLIC PROC [text: Rope.ROPE, maxlen: INT] RETURNS [packet, remainder: Rope.ROPE] = { index: INT; insidePhonemes: BOOL; IF text.Length < maxlen THEN RETURN[packet: text, remainder: ""]; [index, insidePhonemes] ¬ FindTextBreak[text, maxlen]; IF index=0 THEN { index ¬ maxlen; VoiceUtils.Report[Rope.Concat["Warning: No good place to break following string.\n", text.Substr[len: index]], $Finch]; }; packet ¬ text.Substr[len: index]; remainder ¬ text.Substr[start: index]; IF insidePhonemes THEN { -- close the current phoneme command, break, and reopen it packet ¬ Rope.Concat[packet, "\033\\"]; remainder ¬ Rope.Concat["\033P;z", remainder]; }; }; FindTextBreak: PROC [text: Rope.ROPE, maxlen: INT] RETURNS [breakIndex: INT, indexInsidePhonemes: BOOL] = { cmdBoundary, primaryBreak, secondaryBreak: INT ¬ 0; insidePhonemes, primInsidePhonemes, secInsidePhonemes: BOOL ¬ FALSE; prevc: CHAR ¬ IO.SP; -- i-2 c: CHAR ¬ text.Fetch[0]; -- i-1 FOR i: INT IN [1..maxlen] DO nextc: CHAR ¬ ProtectFetch[text, i]; -- i SELECT c FROM IO.ESC => { SELECT nextc FROM 'P => { -- c is the first character of a command; may be a phoneme command cmdBoundary ¬ i-1; IF PhonemeCmd[text, i+1] THEN insidePhonemes ¬ TRUE; }; '\\ => { -- nextc is the end of a DECtalk command cmdBoundary ¬ i+1; insidePhonemes ¬ FALSE; }; ENDCASE; -- c is some other ESC (what is this silly user doing??) }; IO.SP, IO.TAB, IO.CR, IO.LF => { secondaryBreak ¬ i; secInsidePhonemes ¬ insidePhonemes; SELECT prevc FROM '. => IF NOT SpecialAbbrev[text, i-4] THEN {primaryBreak ¬ i; primInsidePhonemes ¬ insidePhonemes}; ', , ';, '!, '?, '), '}, '], '", '' => {primaryBreak ¬ i; primInsidePhonemes ¬ insidePhonemes}; ENDCASE => { SELECT nextc FROM '(, '{, '[, '", '', '` => {primaryBreak ¬ i; -- the last one is an open quote primInsidePhonemes ¬ insidePhonemes}; ENDCASE; }; }; ENDCASE; prevc ¬ c; c ¬ nextc; ENDLOOP; IF primaryBreak > 0 THEN RETURN [primaryBreak, primInsidePhonemes] -- could be inside a dict cmd, oh well ELSE IF cmdBoundary > 0 THEN RETURN [cmdBoundary, FALSE] ELSE IF secondaryBreak > 0 THEN RETURN [secondaryBreak, secInsidePhonemes] ELSE RETURN [0, insidePhonemes] -- this is a wierd string: no good place to break }; PhonemeCmd: PROC [text: Rope.ROPE, i: INT] RETURNS [BOOL ¬ FALSE] = { nextw: Rope.ROPE ¬ ProtectSubstr[text: text, start: i, len: 2]; IF Rope.Equal[s1: nextw, s2: ";z", case: TRUE] THEN RETURN[TRUE]; nextw ¬ ProtectSubstr[text: text, start: i, len: 4]; IF Rope.Equal[s1: nextw, s2: "0;0z", case: TRUE] THEN RETURN[TRUE]; }; SpecialAbbrev: PROC [text: Rope.ROPE, i: INT] RETURNS [BOOL ¬ FALSE] = { prevw: Rope.ROPE ¬ ProtectSubstr[text: text, start: i, len: 2]; prevc: CHAR ¬ ProtectFetch[text, i-1]; SELECT prevc FROM IO.SP, IO.TAB, IO.CR, IO.LF => NULL; ENDCASE => RETURN[FALSE]; IF Rope.Equal[s1: prevw, s2: "ft", case: FALSE] OR Rope.Equal[s1: prevw, s2: "st", case: FALSE] OR Rope.Equal[s1: prevw, s2: "dr", case: FALSE] THEN FOR j: INT ¬ i+1, j+1 DO scanc: CHAR ¬ ProtectFetch[text: text, index: j, default: 'a]; SELECT scanc FROM IN ['A..'Z] => RETURN[TRUE]; IN ['a..'z] => RETURN[FALSE]; ENDCASE; ENDLOOP; }; ProtectFetch: PROC [text: Rope.ROPE, index: INT, default: CHAR ¬ IO.SP] RETURNS [char: CHAR] ~ { ENABLE RuntimeError.BoundsFault => {char ¬ default; CONTINUE}; char ¬ text.Fetch[index]; }; ProtectSubstr: PROC [text: Rope.ROPE, start: INT, len: INT] RETURNS [substr: Rope.ROPE ¬ ""] ~ { ENABLE RuntimeError.BoundsFault => CONTINUE; substr ¬ text.Substr[start, len]; }; }. φ SynthesizerImpl.mesa Copyright Σ 1986, 1987, 1992 by Xerox Corporation. All rights reserved. Polle Zellweger (PTZ) July 28, 1986 11:15:41 pm PDT Swinehart, April 4, 1987 3:24:47 pm PST This module is currently set up for the DECtalk. July 17, 1986 PTZ primary/secondaryBreak will be the first character of the next packet. Returns breakIndex = 0 if there is no good place to break the string. '[, 'c, IO.SP => c may be the first character of some other DECtalk command, but the user is not permitted to send these commands. Now choose an appropriate spot to break the text.... A DECtalk phoneme command looks like ESC P 0 ; 0 z phonemic text ESC \ The zeros are optional, but we only recognize the cases of both zeros there or both zeros missing. DECtalk: St. followed by a capital letter = Saint; not followed by a capital letter = Street. Dr. followed by a capital letter = Doctor; not followed by a capital letter = Drive. Prose adds: Ft. followed by a capital letter = Fort; not followed by a capital letter = feet. (The DECtalk distinguishes between these latter two on the basis of capitalization: Ft. = Fort, ft. = feet. But what about FT.? Seems like a bug!) If we run off the beginning of the string, there must have been a legal break there. Polle Zellweger (PTZ) July 17, 1986 6:02:46 pm PDT changes to: SynthesizerImpl, BreakText, FindTextBreak Polle Zellweger (PTZ) July 18, 1986 11:41:44 am PDT changes to: DIRECTORY, BreakText, FindTextBreak, PhonemeCmd, nextc (local of FindTextBreak), nextw (local of PhonemeCmd), prevw (local of SpecialAbbrev), SpecialAbbrev, scanc (local of SpecialAbbrev), SynthesizerImpl Polle Zellweger (PTZ) July 18, 1986 5:17:06 pm PDT changes to: BreakText, FindTextBreak, PhonemeCmd, prevw (local of SpecialAbbrev), prevc (local of SpecialAbbrev), SpecialAbbrev, nextw (local of PhonemeCmd) Polle Zellweger (PTZ) July 28, 1986 8:57:29 pm PDT changes to: nextc (local of FindTextBreak), nextw (local of PhonemeCmd), PhonemeCmd, prevw (local of SpecialAbbrev), prevc (local of SpecialAbbrev), scanc (local of SpecialAbbrev) Polle Zellweger (PTZ) July 28, 1986 9:30:23 pm PDT changes to: FindTextBreak, PhonemeCmd, SpecialAbbrev, ProtectFetch, ProtectSubstr Polle Zellweger (PTZ) July 28, 1986 10:34:46 pm PDT changes to: FindTextBreak ΚΟ•NewlineDelimiter –(cedarcode) style™šœ™Icodešœ Οeœ=™HK™3K™'K˜—šΟk ˜ Kšžœ˜Kšœžœ!žœ ˜:Kšœ žœ˜#Kšœ ˜ Kšœ žœ ˜K˜K˜—šœž œž˜&Kšœ˜Kšžœ˜K˜—K™CK™šΟn œžœžœ žœ žœžœžœ˜`Kšœžœžœ˜"Kšžœžœžœ˜AK˜6šžœžœ˜K˜Kšœw˜wK˜—K˜!K˜&šžœžœΟc:˜TK˜'K˜.K˜—K˜—K˜šŸ œžœ žœ žœžœžœžœ˜kKšœ™Kšœ+žœ˜3Kšœ7žœžœ˜DKšœžœžœžœ ˜Kšœžœ ˜ šžœžœžœ ž˜Kšœžœ ˜*šžœž˜ šžœžœ˜ šžœž˜šœ C˜JK˜Kšžœžœžœ˜4K˜—šœ  (˜1K˜Kšœž˜K˜—Kšœžœžœv™ƒKšžœ 8˜A—K˜—šžœžœžœžœžœžœžœžœ˜ K˜K˜#šžœž˜Kšœžœžœžœ9˜cK˜_šžœ˜ šžœž˜Kšœ.  œ&˜tKšžœ˜—K˜——K˜—Kšžœ˜—K˜Kšžœ˜—K™4šžœž˜Kšžœ% &˜Q—šžœžœž˜Kšžœžœ˜—šžœžœž˜Kšžœ$˜*—šž˜Kšžœ 2˜L—˜K˜——šŸ œžœ žœžœžœžœžœ˜EK™FK™bKšœ žœ/˜?Kš žœ'žœžœžœžœ˜AK˜4Kš žœ)žœžœžœžœ˜CK˜K˜—šŸ œžœ žœžœžœžœžœ˜HK™³K™ςKšœ žœ/˜?šœžœ˜&K™T—šžœž˜Kšžœžœžœžœžœžœžœžœžœ˜$Kšžœžœžœ˜—š žœ'žœžœ'žœžœ'žœž˜”šžœžœ ž˜Kšœžœ3˜>šžœž˜Kšžœ žœžœ˜Kšžœ žœžœ˜Kšžœ˜—Kšžœ˜——K˜—K˜šŸ œžœ žœ žœ žœžœžœžœžœ˜`Kšžœ.žœ˜>K˜K˜K˜—šŸ œžœ žœ žœžœžœžœ ˜`Kšžœž ˜,K˜!K˜—K˜K˜™2Kšœ Οr)™5—™3Kš œ ‘6œ‘œ‘œ‘œ‘™Ψ—™2Kšœ ‘+œ‘œ‘œ™œ—K™™2Kš œ ‘œ‘œ‘œ‘œ‘œ™³—™2Kšœ ‘E™Q—™3Kšœ ‘ ™—K™—…—¬q