DIRECTORY IO, Rope USING [ Concat, Equal, Fetch, Length, ROPE, Substr ], RuntimeError USING [ BoundsFault ], Synthesizer, VoiceUtils USING [ Report ] ; SynthesizerImpl: CEDAR PROGRAM IMPORTS Rope, RuntimeError, VoiceUtils EXPORTS Synthesizer= { BreakText: PUBLIC PROC [text: Rope.ROPE, maxlen: INT] RETURNS [packet, remainder: Rope.ROPE] = { index: INT; insidePhonemes: BOOL; IF text.Length < maxlen THEN RETURN[packet: text, remainder: ""]; [index, insidePhonemes] _ FindTextBreak[text, maxlen]; IF index=0 THEN { index _ maxlen; VoiceUtils.Report[Rope.Concat["Warning: No good place to break following string.\n", text.Substr[len: index]], $Finch]; }; packet _ text.Substr[len: index]; remainder _ text.Substr[start: index]; IF insidePhonemes THEN { -- close the current phoneme command, break, and reopen it packet _ Rope.Concat[packet, "\033\\"]; remainder _ Rope.Concat["\033P;z", remainder]; }; }; FindTextBreak: PROC [text: Rope.ROPE, maxlen: INT] RETURNS [breakIndex: INT, indexInsidePhonemes: BOOL] = { cmdBoundary, primaryBreak, secondaryBreak: INT _ 0; insidePhonemes, primInsidePhonemes, secInsidePhonemes: BOOL _ FALSE; prevc: CHAR _ IO.SP; -- i-2 c: CHAR _ text.Fetch[0]; -- i-1 FOR i: INT IN [1..maxlen] DO nextc: CHAR _ ProtectFetch[text, i]; -- i SELECT c FROM IO.ESC => { SELECT nextc FROM 'P => { -- c is the first character of a command; may be a phoneme command cmdBoundary _ i-1; IF PhonemeCmd[text, i+1] THEN insidePhonemes _ TRUE; }; '\\ => { -- nextc is the end of a DECtalk command cmdBoundary _ i+1; insidePhonemes _ FALSE; }; ENDCASE; -- c is some other ESC (what is this silly user doing??) }; IO.SP, IO.TAB, IO.CR, IO.LF => { secondaryBreak _ i; secInsidePhonemes _ insidePhonemes; SELECT prevc FROM '. => IF NOT SpecialAbbrev[text, i-4] THEN {primaryBreak _ i; primInsidePhonemes _ insidePhonemes}; ', , ';, '!, '?, '), '}, '], '", '' => {primaryBreak _ i; primInsidePhonemes _ insidePhonemes}; ENDCASE => { SELECT nextc FROM '(, '{, '[, '", '', '` => {primaryBreak _ i; -- the last one is an open quote primInsidePhonemes _ insidePhonemes}; ENDCASE; }; }; ENDCASE; prevc _ c; c _ nextc; ENDLOOP; IF primaryBreak > 0 THEN RETURN [primaryBreak, primInsidePhonemes] -- could be inside a dict cmd, oh well ELSE IF cmdBoundary > 0 THEN RETURN [cmdBoundary, FALSE] ELSE IF secondaryBreak > 0 THEN RETURN [secondaryBreak, secInsidePhonemes] ELSE RETURN [0, insidePhonemes] -- this is a wierd string: no good place to break }; PhonemeCmd: PROC [text: Rope.ROPE, i: INT] RETURNS [BOOL _ FALSE] = { nextw: Rope.ROPE _ ProtectSubstr[text: text, start: i, len: 2]; IF Rope.Equal[s1: nextw, s2: ";z", case: TRUE] THEN RETURN[TRUE]; nextw _ ProtectSubstr[text: text, start: i, len: 4]; IF Rope.Equal[s1: nextw, s2: "0;0z", case: TRUE] THEN RETURN[TRUE]; }; SpecialAbbrev: PROC [text: Rope.ROPE, i: INT] RETURNS [BOOL _ FALSE] = { prevw: Rope.ROPE _ ProtectSubstr[text: text, start: i, len: 2]; prevc: CHAR _ ProtectFetch[text, i-1]; SELECT prevc FROM IO.SP, IO.TAB, IO.CR, IO.LF => NULL; ENDCASE => RETURN[FALSE]; IF Rope.Equal[s1: prevw, s2: "ft", case: FALSE] OR Rope.Equal[s1: prevw, s2: "st", case: FALSE] OR Rope.Equal[s1: prevw, s2: "dr", case: FALSE] THEN FOR j: INT _ i+1, j+1 DO scanc: CHAR _ ProtectFetch[text: text, index: j, default: 'a]; SELECT scanc FROM IN ['A..'Z] => RETURN[TRUE]; IN ['a..'z] => RETURN[FALSE]; ENDCASE; ENDLOOP; }; ProtectFetch: PROC [text: Rope.ROPE, index: INT, default: CHAR _ IO.SP] RETURNS [char: CHAR] ~ { ENABLE RuntimeError.BoundsFault => {char _ default; CONTINUE}; char _ text.Fetch[index]; }; ProtectSubstr: PROC [text: Rope.ROPE, start: INT, len: INT] RETURNS [substr: Rope.ROPE _ ""] ~ { ENABLE RuntimeError.BoundsFault => CONTINUE; substr _ text.Substr[start, len]; }; }. πSynthesizerImpl.mesa Copyright Σ 1986, 1987 by Xerox Corporation. All rights reserved. Polle Zellweger (PTZ) July 28, 1986 11:15:41 pm PDT Swinehart, April 4, 1987 3:24:47 pm PST This module is currently set up for the DECtalk. July 17, 1986 PTZ primary/secondaryBreak will be the first character of the next packet. Returns breakIndex = 0 if there is no good place to break the string. '[, 'c, IO.SP => c may be the first character of some other DECtalk command, but the user is not permitted to send these commands. Now choose an appropriate spot to break the text.... A DECtalk phoneme command looks like ESC P 0 ; 0 z phonemic text ESC \ The zeros are optional, but we only recognize the cases of both zeros there or both zeros missing. DECtalk: St. followed by a capital letter = Saint; not followed by a capital letter = Street. Dr. followed by a capital letter = Doctor; not followed by a capital letter = Drive. Prose adds: Ft. followed by a capital letter = Fort; not followed by a capital letter = feet. (The DECtalk distinguishes between these latter two on the basis of capitalization: Ft. = Fort, ft. = feet. But what about FT.? Seems like a bug!) If we run off the beginning of the string, there must have been a legal break there. Polle Zellweger (PTZ) July 17, 1986 6:02:46 pm PDT changes to: SynthesizerImpl, BreakText, FindTextBreak Polle Zellweger (PTZ) July 18, 1986 11:41:44 am PDT changes to: DIRECTORY, BreakText, FindTextBreak, PhonemeCmd, nextc (local of FindTextBreak), nextw (local of PhonemeCmd), prevw (local of SpecialAbbrev), SpecialAbbrev, scanc (local of SpecialAbbrev), SynthesizerImpl Polle Zellweger (PTZ) July 18, 1986 5:17:06 pm PDT changes to: BreakText, FindTextBreak, PhonemeCmd, prevw (local of SpecialAbbrev), prevc (local of SpecialAbbrev), SpecialAbbrev, nextw (local of PhonemeCmd) Polle Zellweger (PTZ) July 28, 1986 8:57:29 pm PDT changes to: nextc (local of FindTextBreak), nextw (local of PhonemeCmd), PhonemeCmd, prevw (local of SpecialAbbrev), prevc (local of SpecialAbbrev), scanc (local of SpecialAbbrev) Polle Zellweger (PTZ) July 28, 1986 9:30:23 pm PDT changes to: FindTextBreak, PhonemeCmd, SpecialAbbrev, ProtectFetch, ProtectSubstr Polle Zellweger (PTZ) July 28, 1986 10:34:46 pm PDT changes to: FindTextBreak ΚΪ˜šœ™IcodešœB™BK™3K™'J˜—šΟk ˜ Jšœ˜Jšœœ!œ ˜:Jšœ œ˜#Jšœ ˜ Jšœ œ ˜J˜J˜—šœ œ˜&Jšœ˜Jšœ˜J˜—K™CK™šΟn œœœ œ œœœ˜`Jšœœœ˜"Kšœœœ˜AJšœ6˜6šœœ˜Kšœ˜Kšœw˜wK˜—Kšœ!˜!Kšœ&˜&šœœΟc:˜TKšœ'˜'Kšœ.˜.K˜—K˜—K˜šž œœ œ œœœœ˜kKšœ™Kšœ+œ˜3Kšœ7œœ˜DKšœœœœŸ˜KšœœŸ˜ šœœœ ˜KšœœŸ˜*šœ˜ šœœ˜ šœ˜šœŸC˜JKšœ˜Kšœœœ˜4K˜—šœ Ÿ(˜1Kšœ˜Kšœ˜K˜—Kšœœœv™ƒKšœŸ8˜A—K˜—šœœœœœœœœ˜ Kšœ˜Kšœ#˜#šœ˜Kšœœœœ9˜cKšœ9Ÿœ%˜_šœ˜ šœ˜Kšœ.Ÿ œ&˜tKšœ˜—K˜——K˜—Kšœ˜—K˜Kšœ˜—K™4šœ˜Kšœ%Ÿ&˜Q—šœœ˜Kšœœ˜—šœœ˜Kšœ$˜*—š˜KšœŸ2˜L—˜K˜——šž œœ œœœœœ˜EK™FK™bKšœ œ/˜?Kš œ'œœœœ˜AKšœ4˜4Kš œ)œœœœ˜CK˜K˜—šž œœ œœœœœ˜HK™³K™ςKšœ œ/˜?šœœ˜&K™T—šœ˜Kšœœœœœœœœœ˜$Kšœœœ˜—š œ'œœ'œœ'œ˜”šœœ ˜Kšœœ3˜>šœ˜Kšœ œœ˜Kšœ œœ˜Kšœ˜—Kšœ˜——K˜—K˜šž œœ œ œ  œœœœ˜`Kšœ.œ˜>Kšœ˜K˜K˜—šž œœ œ œœœœ ˜`Kšœ ˜,Kšœ!˜!K˜—K˜K˜™2Kšœ Οr)™5—™3Kš œ  6œ œ œ œ ™Ψ—™2Kšœ  +œ œ œ™œ—K™™2Kš œ  œ œ œ œ œ™³—™2Kšœ  E™Q—™3Kšœ   ™—K™—…—¬v