mirror of
https://github.com/rany2/edge-tts
synced 2024-11-22 01:45:02 +00:00
Merge pull request #138 from scott306lr/master
Adding pitch variable back
This commit is contained in:
commit
9db10f02a0
@ -63,17 +63,16 @@ You must first check the available voices with the `--list-voices` option:
|
|||||||
|
|
||||||
Support for custom SSML has been removed since 5.0.0 because Microsoft has taken the initiative to prevent it from working. You cannot use custom SSML anymore.
|
Support for custom SSML has been removed since 5.0.0 because Microsoft has taken the initiative to prevent it from working. You cannot use custom SSML anymore.
|
||||||
|
|
||||||
### Changing rate and volume
|
### Changing rate, volume and pitch
|
||||||
|
|
||||||
It is possible to make minor changes to the generated speech.
|
It is possible to make minor changes to the generated speech.
|
||||||
|
|
||||||
$ edge-tts --rate=-50% --text "Hello, world!" --write-media hello_with_rate_halved.mp3 --write-subtitles hello_with_rate_halved.vtt
|
$ edge-tts --rate=-50% --text "Hello, world!" --write-media hello_with_rate_halved.mp3 --write-subtitles hello_with_rate_halved.vtt
|
||||||
$ edge-tts --volume=-50% --text "Hello, world!" --write-media hello_with_volume_halved.mp3 --write-subtitles hello_with_volume_halved.vtt
|
$ edge-tts --volume=-50% --text "Hello, world!" --write-media hello_with_volume_halved.mp3 --write-subtitles hello_with_volume_halved.vtt
|
||||||
|
$ edge-tts --pitch=-50Hz --text "Hello, world!" --write-media hello_with_pitch_halved.mp3 --write-subtitles hello_with_pitch_halved.vtt
|
||||||
|
|
||||||
In addition, it is required to use `--rate=-50%` instead of `--rate -50%` (note the lack of an equal sign) otherwise the `-50%` would be interpreted as just another argument.
|
In addition, it is required to use `--rate=-50%` instead of `--rate -50%` (note the lack of an equal sign) otherwise the `-50%` would be interpreted as just another argument.
|
||||||
|
|
||||||
**NOTE**: `--pitch` was removed in 6.0.3 as it no longer appears to have any effect.
|
|
||||||
|
|
||||||
### Note on the `edge-playback` command
|
### Note on the `edge-playback` command
|
||||||
|
|
||||||
`edge-playback` is just a wrapper around `edge-tts` that plays back the generated speech. It takes the same arguments as the `edge-tts` option.
|
`edge-playback` is just a wrapper around `edge-tts` that plays back the generated speech. It takes the same arguments as the `edge-tts` option.
|
||||||
|
@ -152,7 +152,7 @@ def split_text_by_byte_length(
|
|||||||
yield new_text
|
yield new_text
|
||||||
|
|
||||||
|
|
||||||
def mkssml(text: Union[str, bytes], voice: str, rate: str, volume: str) -> str:
|
def mkssml(text: Union[str, bytes], voice: str, rate: str, volume: str, pitch: str) -> str:
|
||||||
"""
|
"""
|
||||||
Creates a SSML string from the given parameters.
|
Creates a SSML string from the given parameters.
|
||||||
|
|
||||||
@ -164,7 +164,7 @@ def mkssml(text: Union[str, bytes], voice: str, rate: str, volume: str) -> str:
|
|||||||
|
|
||||||
ssml = (
|
ssml = (
|
||||||
"<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>"
|
"<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>"
|
||||||
f"<voice name='{voice}'><prosody pitch='+0Hz' rate='{rate}' volume='{volume}'>"
|
f"<voice name='{voice}'><prosody pitch='{pitch}' rate='{rate}' volume='{volume}'>"
|
||||||
f"{text}</prosody></voice></speak>"
|
f"{text}</prosody></voice></speak>"
|
||||||
)
|
)
|
||||||
return ssml
|
return ssml
|
||||||
@ -203,7 +203,7 @@ def ssml_headers_plus_data(request_id: str, timestamp: str, ssml: str) -> str:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def calc_max_mesg_size(voice: str, rate: str, volume: str) -> int:
|
def calc_max_mesg_size(voice: str, rate: str, volume: str, pitch: str) -> int:
|
||||||
"""Calculates the maximum message size for the given voice, rate, and volume.
|
"""Calculates the maximum message size for the given voice, rate, and volume.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
@ -215,7 +215,7 @@ def calc_max_mesg_size(voice: str, rate: str, volume: str) -> int:
|
|||||||
ssml_headers_plus_data(
|
ssml_headers_plus_data(
|
||||||
connect_id(),
|
connect_id(),
|
||||||
date_to_string(),
|
date_to_string(),
|
||||||
mkssml("", voice, rate, volume),
|
mkssml("", voice, rate, volume, pitch),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
+ 50 # margin of error
|
+ 50 # margin of error
|
||||||
@ -235,6 +235,7 @@ class Communicate:
|
|||||||
*,
|
*,
|
||||||
rate: str = "+0%",
|
rate: str = "+0%",
|
||||||
volume: str = "+0%",
|
volume: str = "+0%",
|
||||||
|
pitch: str = "+0Hz",
|
||||||
proxy: Optional[str] = None,
|
proxy: Optional[str] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@ -289,6 +290,12 @@ class Communicate:
|
|||||||
raise ValueError(f"Invalid volume '{volume}'.")
|
raise ValueError(f"Invalid volume '{volume}'.")
|
||||||
self.volume: str = volume
|
self.volume: str = volume
|
||||||
|
|
||||||
|
if not isinstance(pitch, str):
|
||||||
|
raise TypeError("pitch must be str")
|
||||||
|
if re.match(r"^[+-]\d+Hz$", pitch) is None:
|
||||||
|
raise ValueError(f"Invalid pitch '{pitch}'.")
|
||||||
|
self.pitch: str = pitch
|
||||||
|
|
||||||
if proxy is not None and not isinstance(proxy, str):
|
if proxy is not None and not isinstance(proxy, str):
|
||||||
raise TypeError("proxy must be str")
|
raise TypeError("proxy must be str")
|
||||||
self.proxy: Optional[str] = proxy
|
self.proxy: Optional[str] = proxy
|
||||||
@ -298,7 +305,7 @@ class Communicate:
|
|||||||
|
|
||||||
texts = split_text_by_byte_length(
|
texts = split_text_by_byte_length(
|
||||||
escape(remove_incompatible_characters(self.text)),
|
escape(remove_incompatible_characters(self.text)),
|
||||||
calc_max_mesg_size(self.voice, self.rate, self.volume),
|
calc_max_mesg_size(self.voice, self.rate, self.volume, self.pitch),
|
||||||
)
|
)
|
||||||
final_utterance: Dict[int, int] = {}
|
final_utterance: Dict[int, int] = {}
|
||||||
prev_idx = -1
|
prev_idx = -1
|
||||||
@ -362,7 +369,7 @@ class Communicate:
|
|||||||
ssml_headers_plus_data(
|
ssml_headers_plus_data(
|
||||||
connect_id(),
|
connect_id(),
|
||||||
date,
|
date,
|
||||||
mkssml(text, self.voice, self.rate, self.volume),
|
mkssml(text, self.voice, self.rate, self.volume, self.pitch),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -57,6 +57,7 @@ async def _run_tts(args: Any) -> None:
|
|||||||
proxy=args.proxy,
|
proxy=args.proxy,
|
||||||
rate=args.rate,
|
rate=args.rate,
|
||||||
volume=args.volume,
|
volume=args.volume,
|
||||||
|
pitch=args.pitch,
|
||||||
)
|
)
|
||||||
subs: SubMaker = SubMaker()
|
subs: SubMaker = SubMaker()
|
||||||
with open(
|
with open(
|
||||||
@ -97,6 +98,7 @@ async def amain() -> None:
|
|||||||
)
|
)
|
||||||
parser.add_argument("--rate", help="set TTS rate. Default +0%%.", default="+0%")
|
parser.add_argument("--rate", help="set TTS rate. Default +0%%.", default="+0%")
|
||||||
parser.add_argument("--volume", help="set TTS volume. Default +0%%.", default="+0%")
|
parser.add_argument("--volume", help="set TTS volume. Default +0%%.", default="+0%")
|
||||||
|
parser.add_argument("--pitch", help="set TTS pitch. Default +0Hz.", default="+0Hz")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--words-in-cue",
|
"--words-in-cue",
|
||||||
help="number of words in a subtitle cue. Default: 10.",
|
help="number of words in a subtitle cue. Default: 10.",
|
||||||
|
Loading…
Reference in New Issue
Block a user