Merge pull request #138 from scott306lr/master

Adding pitch variable back
This commit is contained in:
rany2 2023-08-31 18:02:04 +03:00 committed by GitHub
commit 9db10f02a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 17 additions and 9 deletions

View File

@ -63,17 +63,16 @@ You must first check the available voices with the `--list-voices` option:
Support for custom SSML has been removed since 5.0.0 because Microsoft has taken the initiative to prevent it from working. You cannot use custom SSML anymore. Support for custom SSML has been removed since 5.0.0 because Microsoft has taken the initiative to prevent it from working. You cannot use custom SSML anymore.
### Changing rate and volume ### Changing rate, volume and pitch
It is possible to make minor changes to the generated speech. It is possible to make minor changes to the generated speech.
$ edge-tts --rate=-50% --text "Hello, world!" --write-media hello_with_rate_halved.mp3 --write-subtitles hello_with_rate_halved.vtt $ edge-tts --rate=-50% --text "Hello, world!" --write-media hello_with_rate_halved.mp3 --write-subtitles hello_with_rate_halved.vtt
$ edge-tts --volume=-50% --text "Hello, world!" --write-media hello_with_volume_halved.mp3 --write-subtitles hello_with_volume_halved.vtt $ edge-tts --volume=-50% --text "Hello, world!" --write-media hello_with_volume_halved.mp3 --write-subtitles hello_with_volume_halved.vtt
$ edge-tts --pitch=-50Hz --text "Hello, world!" --write-media hello_with_pitch_halved.mp3 --write-subtitles hello_with_pitch_halved.vtt
In addition, it is required to use `--rate=-50%` instead of `--rate -50%` (note the lack of an equal sign) otherwise the `-50%` would be interpreted as just another argument. In addition, it is required to use `--rate=-50%` instead of `--rate -50%` (note the lack of an equal sign) otherwise the `-50%` would be interpreted as just another argument.
**NOTE**: `--pitch` was removed in 6.0.3 as it no longer appears to have any effect.
### Note on the `edge-playback` command ### Note on the `edge-playback` command
`edge-playback` is just a wrapper around `edge-tts` that plays back the generated speech. It takes the same arguments as the `edge-tts` option. `edge-playback` is just a wrapper around `edge-tts` that plays back the generated speech. It takes the same arguments as the `edge-tts` option.

View File

@ -152,7 +152,7 @@ def split_text_by_byte_length(
yield new_text yield new_text
def mkssml(text: Union[str, bytes], voice: str, rate: str, volume: str) -> str: def mkssml(text: Union[str, bytes], voice: str, rate: str, volume: str, pitch: str) -> str:
""" """
Creates a SSML string from the given parameters. Creates a SSML string from the given parameters.
@ -164,7 +164,7 @@ def mkssml(text: Union[str, bytes], voice: str, rate: str, volume: str) -> str:
ssml = ( ssml = (
"<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>" "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>"
f"<voice name='{voice}'><prosody pitch='+0Hz' rate='{rate}' volume='{volume}'>" f"<voice name='{voice}'><prosody pitch='{pitch}' rate='{rate}' volume='{volume}'>"
f"{text}</prosody></voice></speak>" f"{text}</prosody></voice></speak>"
) )
return ssml return ssml
@ -203,7 +203,7 @@ def ssml_headers_plus_data(request_id: str, timestamp: str, ssml: str) -> str:
) )
def calc_max_mesg_size(voice: str, rate: str, volume: str) -> int: def calc_max_mesg_size(voice: str, rate: str, volume: str, pitch: str) -> int:
"""Calculates the maximum message size for the given voice, rate, and volume. """Calculates the maximum message size for the given voice, rate, and volume.
Returns: Returns:
@ -215,7 +215,7 @@ def calc_max_mesg_size(voice: str, rate: str, volume: str) -> int:
ssml_headers_plus_data( ssml_headers_plus_data(
connect_id(), connect_id(),
date_to_string(), date_to_string(),
mkssml("", voice, rate, volume), mkssml("", voice, rate, volume, pitch),
) )
) )
+ 50 # margin of error + 50 # margin of error
@ -235,6 +235,7 @@ class Communicate:
*, *,
rate: str = "+0%", rate: str = "+0%",
volume: str = "+0%", volume: str = "+0%",
pitch: str = "+0Hz",
proxy: Optional[str] = None, proxy: Optional[str] = None,
): ):
""" """
@ -289,6 +290,12 @@ class Communicate:
raise ValueError(f"Invalid volume '{volume}'.") raise ValueError(f"Invalid volume '{volume}'.")
self.volume: str = volume self.volume: str = volume
if not isinstance(pitch, str):
raise TypeError("pitch must be str")
if re.match(r"^[+-]\d+Hz$", pitch) is None:
raise ValueError(f"Invalid pitch '{pitch}'.")
self.pitch: str = pitch
if proxy is not None and not isinstance(proxy, str): if proxy is not None and not isinstance(proxy, str):
raise TypeError("proxy must be str") raise TypeError("proxy must be str")
self.proxy: Optional[str] = proxy self.proxy: Optional[str] = proxy
@ -298,7 +305,7 @@ class Communicate:
texts = split_text_by_byte_length( texts = split_text_by_byte_length(
escape(remove_incompatible_characters(self.text)), escape(remove_incompatible_characters(self.text)),
calc_max_mesg_size(self.voice, self.rate, self.volume), calc_max_mesg_size(self.voice, self.rate, self.volume, self.pitch),
) )
final_utterance: Dict[int, int] = {} final_utterance: Dict[int, int] = {}
prev_idx = -1 prev_idx = -1
@ -362,7 +369,7 @@ class Communicate:
ssml_headers_plus_data( ssml_headers_plus_data(
connect_id(), connect_id(),
date, date,
mkssml(text, self.voice, self.rate, self.volume), mkssml(text, self.voice, self.rate, self.volume, self.pitch),
) )
) )

View File

@ -57,6 +57,7 @@ async def _run_tts(args: Any) -> None:
proxy=args.proxy, proxy=args.proxy,
rate=args.rate, rate=args.rate,
volume=args.volume, volume=args.volume,
pitch=args.pitch,
) )
subs: SubMaker = SubMaker() subs: SubMaker = SubMaker()
with open( with open(
@ -97,6 +98,7 @@ async def amain() -> None:
) )
parser.add_argument("--rate", help="set TTS rate. Default +0%%.", default="+0%") parser.add_argument("--rate", help="set TTS rate. Default +0%%.", default="+0%")
parser.add_argument("--volume", help="set TTS volume. Default +0%%.", default="+0%") parser.add_argument("--volume", help="set TTS volume. Default +0%%.", default="+0%")
parser.add_argument("--pitch", help="set TTS pitch. Default +0Hz.", default="+0Hz")
parser.add_argument( parser.add_argument(
"--words-in-cue", "--words-in-cue",
help="number of words in a subtitle cue. Default: 10.", help="number of words in a subtitle cue. Default: 10.",