mirror of
https://github.com/rany2/edge-tts
synced 2024-11-22 01:45:02 +00:00
Update to edge-tts 4.0.0
This commit is contained in:
parent
756766fe6e
commit
4fcecddaf0
3
build.sh
Executable file
3
build.sh
Executable file
@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
set -x
|
||||
exec python3 setup.py sdist bdist_wheel
|
@ -1,6 +1,12 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
rm -rf build dist src/*.egg-info
|
||||
python3 setup.py sdist bdist_wheel
|
||||
twine upload dist/*
|
||||
rm -rf build dist src/*.egg-info
|
||||
|
||||
set -ex
|
||||
|
||||
./clean.sh
|
||||
|
||||
./build.sh
|
||||
./publish.sh
|
||||
|
||||
./clean.sh
|
||||
|
||||
exit 0
|
||||
|
3
clean.sh
Executable file
3
clean.sh
Executable file
@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
set -x
|
||||
exec rm -rf build dist src/*.egg-info
|
@ -1,21 +1,27 @@
|
||||
#!/usr/bin/env python3
|
||||
# Example Python script that shows how to use edge-tts as a module
|
||||
"""
|
||||
Example Python script that shows how to use edge-tts as a module
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import tempfile
|
||||
|
||||
from playsound import playsound
|
||||
|
||||
import edgeTTS
|
||||
import edge_tts
|
||||
|
||||
|
||||
async def main():
|
||||
communicate = edgeTTS.Communicate()
|
||||
"""
|
||||
Main function
|
||||
"""
|
||||
communicate = edge_tts.Communicate()
|
||||
ask = input("What do you want TTS to say? ")
|
||||
with tempfile.NamedTemporaryFile() as fp:
|
||||
with tempfile.NamedTemporaryFile() as temporary_file:
|
||||
async for i in communicate.run(ask):
|
||||
if i[2] is not None:
|
||||
fp.write(i[2])
|
||||
playsound(fp.name)
|
||||
temporary_file.write(i[2])
|
||||
playsound(temporary_file.name)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
3
publish.sh
Executable file
3
publish.sh
Executable file
@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
set -x
|
||||
exec twine upload dist/*
|
@ -1,6 +1,6 @@
|
||||
[metadata]
|
||||
name = edge-tts
|
||||
version = 3.0.2
|
||||
version = 4.0.0
|
||||
author = rany
|
||||
author_email = ranygh@riseup.net
|
||||
description = Microsoft Edge's TTS
|
||||
@ -27,5 +27,5 @@ where=src
|
||||
|
||||
[options.entry_points]
|
||||
console_scripts =
|
||||
edge-tts = edgeTTS.__main__:main
|
||||
edge-playback = edgePlayback.__init__:main
|
||||
edge-tts = edge_tts.__main__:main
|
||||
edge-playback = edge_playback.__init__:main
|
||||
|
@ -1,6 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from edgePlayback.__init__ import *
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
26
src/edgePlayback/__init__.py → src/edge_playback/__init__.py
Executable file → Normal file
26
src/edgePlayback/__init__.py → src/edge_playback/__init__.py
Executable file → Normal file
@ -1,5 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Playback TTS with subtitles using edge-tts and mpv.
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
@ -7,13 +11,16 @@ from shutil import which
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Main function.
|
||||
"""
|
||||
if which("mpv") and which("edge-tts"):
|
||||
with tempfile.NamedTemporaryFile() as media:
|
||||
with tempfile.NamedTemporaryFile() as subtitle:
|
||||
print()
|
||||
print("Media file %s" % media.name)
|
||||
print("Subtitle file %s\n" % subtitle.name)
|
||||
p = subprocess.Popen(
|
||||
print(f"Media file {media.name}")
|
||||
print(f"Subtitle file {subtitle.name}\n")
|
||||
with subprocess.Popen(
|
||||
[
|
||||
"edge-tts",
|
||||
"-w",
|
||||
@ -23,17 +30,18 @@ def main():
|
||||
subtitle.name,
|
||||
]
|
||||
+ sys.argv[1:]
|
||||
)
|
||||
p.communicate()
|
||||
p = subprocess.Popen(
|
||||
) as process:
|
||||
process.communicate()
|
||||
|
||||
with subprocess.Popen(
|
||||
[
|
||||
"mpv",
|
||||
"--keep-open=yes",
|
||||
"--sub-file=" + subtitle.name,
|
||||
f"--sub-file={subtitle.name}",
|
||||
media.name,
|
||||
]
|
||||
)
|
||||
p.communicate()
|
||||
) as process:
|
||||
process.communicate()
|
||||
else:
|
||||
print("This script requires mpv and edge-tts.")
|
||||
|
10
src/edge_playback/__main__.py
Normal file
10
src/edge_playback/__main__.py
Normal file
@ -0,0 +1,10 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
This is the main file for the edge_playback package.
|
||||
"""
|
||||
|
||||
from edge_playback.__init__ import main
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,3 +1,7 @@
|
||||
"""
|
||||
__init__ for edge_tts
|
||||
"""
|
||||
|
||||
from .communicate import Communicate
|
||||
from .list_voices import list_voices
|
||||
from .submaker import SubMaker
|
@ -1,3 +1,7 @@
|
||||
"""
|
||||
__main__ for edge_tts.
|
||||
"""
|
||||
|
||||
from .util import main
|
||||
|
||||
if __name__ == "__main__":
|
@ -53,17 +53,14 @@ def remove_incompatible_characters(string):
|
||||
if isinstance(string, bytes):
|
||||
string = string.decode("utf-8")
|
||||
|
||||
cleaned_string = ""
|
||||
for character in string:
|
||||
character_code = ord(character)
|
||||
if (
|
||||
(0 <= character_code <= 8)
|
||||
or (11 <= character_code <= 12)
|
||||
or (14 <= character_code <= 31)
|
||||
):
|
||||
character = " "
|
||||
cleaned_string += character
|
||||
return cleaned_string
|
||||
string = list(string)
|
||||
|
||||
for idx in range(len(string)): # pylint: disable=consider-using-enumerate
|
||||
code = ord(string[idx])
|
||||
if (0 <= code <= 8) or (11 <= code <= 12) or (14 <= code <= 31):
|
||||
string[idx] = " "
|
||||
|
||||
return "".join(string)
|
||||
|
||||
|
||||
def connect_id():
|
||||
@ -144,7 +141,8 @@ def mkssml(text, voice, pitch, rate, volume):
|
||||
|
||||
ssml = (
|
||||
"<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>"
|
||||
f"<voice name='{voice}'><prosody pitch='{pitch}' rate='{rate}' volume='{volume}'>{text}</prosody></voice></speak>"
|
||||
f"<voice name='{voice}'><prosody pitch='{pitch}' rate='{rate}' volume='{volume}'>"
|
||||
f"{text}</prosody></voice></speak>"
|
||||
)
|
||||
return ssml
|
||||
|
||||
@ -192,7 +190,7 @@ def ssml_headers_plus_data(request_id, timestamp, ssml):
|
||||
)
|
||||
|
||||
|
||||
class Communicate:
|
||||
class Communicate: # pylint: disable=too-few-public-methods
|
||||
"""
|
||||
Class for communicating with the service.
|
||||
"""
|
||||
@ -214,7 +212,7 @@ class Communicate:
|
||||
rate="+0%",
|
||||
volume="+0%",
|
||||
customspeak=False,
|
||||
):
|
||||
): # pylint: disable=too-many-arguments, too-many-locals
|
||||
"""
|
||||
Runs the Communicate class.
|
||||
|
||||
@ -266,7 +264,8 @@ class Communicate:
|
||||
"Origin": "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
||||
" (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
|
||||
},
|
||||
) as websocket:
|
||||
for message in messages:
|
||||
@ -275,18 +274,22 @@ class Communicate:
|
||||
|
||||
# Prepare the request to be sent to the service.
|
||||
#
|
||||
# Note that sentenceBoundaryEnabled and wordBoundaryEnabled are actually supposed
|
||||
# to be booleans, but Edge Browser seems to send them as strings and not booleans.
|
||||
# This is a bug in Edge Browser as Azure Cognitive Services actually sends them as
|
||||
# booleans and not strings. For now I will send them as booleans unless it causes
|
||||
# Note sentenceBoundaryEnabled and wordBoundaryEnabled are actually supposed
|
||||
# to be booleans, but Edge Browser seems to send them as strings.
|
||||
#
|
||||
# This is a bug in Edge as Azure Cognitive Services actually sends them as
|
||||
# bool and not string. For now I will send them as bool unless it causes
|
||||
# any problems.
|
||||
#
|
||||
# Also pay close attention to double { } in request (escape for Python .format()).
|
||||
# Also pay close attention to double { } in request (escape for f-string).
|
||||
request = (
|
||||
f"X-Timestamp:{self.date}\r\n"
|
||||
"Content-Type:application/json; charset=utf-8\r\n"
|
||||
"Path:speech.config\r\n\r\n"
|
||||
f'{{"context":{{"synthesis":{{"audio":{{"metadataoptions":{{"sentenceBoundaryEnabled":{sentence_boundary},"wordBoundaryEnabled":{word_boundary}}},"outputFormat":"{codec}"}}}}}}}}\r\n'
|
||||
'{"context":{"synthesis":{"audio":{"metadataoptions":{'
|
||||
f'"sentenceBoundaryEnabled":{sentence_boundary},'
|
||||
f'"wordBoundaryEnabled":{word_boundary}}},"outputFormat":"{codec}"'
|
||||
"}}}}\r\n"
|
||||
)
|
||||
# Send the request to the service.
|
||||
await websocket.send_str(request)
|
@ -15,9 +15,7 @@ async def list_voices():
|
||||
|
||||
This pulls data from the URL used by Microsoft Edge to return a list of
|
||||
all available voices. However many more experimental voices are available
|
||||
than are listed here.
|
||||
(See
|
||||
https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support)
|
||||
than are listed here. (See https://aka.ms/csspeech/voicenames)
|
||||
|
||||
Returns:
|
||||
dict: A dictionary of voice attributes.
|
||||
@ -29,7 +27,8 @@ async def list_voices():
|
||||
"Authority": "speech.platform.bing.com",
|
||||
"Sec-CH-UA": '" Not;A Brand";v="99", "Microsoft Edge";v="91", "Chromium";v="91"',
|
||||
"Sec-CH-UA-Mobile": "?0",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
|
||||
"Accept": "*/*",
|
||||
"Sec-Fetch-Site": "none",
|
||||
"Sec-Fetch-Mode": "cors",
|
@ -1,29 +1,68 @@
|
||||
"""
|
||||
SubMaker package for the Edge TTS project.
|
||||
|
||||
SubMaker is a package that makes the process of creating subtitles with
|
||||
information provided by the service easier.
|
||||
"""
|
||||
|
||||
import math
|
||||
from xml.sax.saxutils import escape
|
||||
|
||||
|
||||
def formatter(offset1, offset2, subdata):
|
||||
data = (
|
||||
"""
|
||||
formatter returns the timecode and the text of the subtitle.
|
||||
"""
|
||||
return (
|
||||
f"{mktimestamp(offset1)} --> {mktimestamp(offset2)}\r\n"
|
||||
f"{escape(subdata)}\r\n\r\n"
|
||||
)
|
||||
return data
|
||||
|
||||
|
||||
def mktimestamp(time_unit):
|
||||
hour = math.floor(time_unit / 10000 / 1000 / 3600)
|
||||
minute = math.floor((time_unit / 10000 / 1000 / 60) % 60)
|
||||
seconds = (time_unit / 10000 / 1000) % 60
|
||||
"""
|
||||
mktimestamp returns the timecode of the subtitle.
|
||||
|
||||
The timecode is in the format of 00:00:00.000.
|
||||
|
||||
Returns:
|
||||
str: The timecode of the subtitle.
|
||||
"""
|
||||
hour = math.floor(time_unit / 10 ** 7 / 3600)
|
||||
minute = math.floor((time_unit / 10 ** 7 / 60) % 60)
|
||||
seconds = (time_unit / 10 ** 7) % 60
|
||||
return f"{hour:02d}:{minute:02d}:{seconds:06.3f}"
|
||||
|
||||
|
||||
class SubMaker:
|
||||
"""
|
||||
SubMaker class
|
||||
"""
|
||||
|
||||
def __init__(self, overlapping=5):
|
||||
"""
|
||||
SubMaker constructor.
|
||||
|
||||
Args:
|
||||
overlapping (int): The amount of time in seconds that the
|
||||
subtitles should overlap.
|
||||
"""
|
||||
self.subs_and_offset = []
|
||||
self.broken_offset = []
|
||||
self.overlapping = overlapping * (10 ** 7)
|
||||
|
||||
def create_sub(self, timestamp, text):
|
||||
"""
|
||||
create_sub creates a subtitle with the given timestamp and text
|
||||
and adds it to the list of subtitles
|
||||
|
||||
Args:
|
||||
timestamp (int): The timestamp of the subtitle.
|
||||
text (str): The text of the subtitle.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
if len(self.subs_and_offset) >= 2:
|
||||
if self.subs_and_offset[-2] >= timestamp + sum(self.broken_offset):
|
||||
self.broken_offset.append(self.subs_and_offset[-2])
|
||||
@ -33,6 +72,12 @@ class SubMaker:
|
||||
self.subs_and_offset.append(text)
|
||||
|
||||
def generate_subs(self):
|
||||
"""
|
||||
generate_subs generates the complete subtitle file.
|
||||
|
||||
Returns:
|
||||
str: The complete subtitle file.
|
||||
"""
|
||||
if len(self.subs_and_offset) >= 2:
|
||||
data = "WEBVTT\r\n\r\n"
|
||||
old_time_stamp = None
|
@ -7,7 +7,54 @@ import argparse
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
from edgeTTS import Communicate, SubMaker, list_voices
|
||||
from edge_tts import Communicate, SubMaker, list_voices
|
||||
|
||||
|
||||
async def _list_voices():
|
||||
"""
|
||||
List available voices.
|
||||
"""
|
||||
for idx, voice in enumerate(await list_voices()):
|
||||
if idx != 0:
|
||||
print()
|
||||
|
||||
for key in voice.keys():
|
||||
if key in ["SuggestedCodec", "FriendlyName", "Status"]:
|
||||
continue
|
||||
# print ("%s: %s" % ("Name" if key == "ShortName" else key, voice[key]))
|
||||
print(f"{key}: {voice[key]}")
|
||||
|
||||
|
||||
async def _tts(args):
|
||||
tts = Communicate()
|
||||
subs = SubMaker(args.overlapping)
|
||||
if args.write_media:
|
||||
media_file = open(args.write_media, "wb") # pylint: disable=consider-using-with
|
||||
async for i in tts.run(
|
||||
args.text,
|
||||
args.enable_sentence_boundary,
|
||||
args.enable_word_boundary,
|
||||
args.codec,
|
||||
args.voice,
|
||||
args.pitch,
|
||||
args.rate,
|
||||
args.volume,
|
||||
customspeak=args.custom_ssml,
|
||||
):
|
||||
if i[2] is not None:
|
||||
if not args.write_media:
|
||||
sys.stdout.buffer.write(i[2])
|
||||
else:
|
||||
media_file.write(i[2])
|
||||
elif i[0] is not None and i[1] is not None:
|
||||
subs.create_sub(i[0], i[1])
|
||||
if args.write_media:
|
||||
media_file.close()
|
||||
if not args.write_subtitles:
|
||||
sys.stderr.write(subs.generate_subs())
|
||||
else:
|
||||
with open(args.write_subtitles, "w", encoding="utf-8") as file:
|
||||
file.write(subs.generate_subs())
|
||||
|
||||
|
||||
async def _main():
|
||||
@ -24,19 +71,23 @@ async def _main():
|
||||
parser.add_argument(
|
||||
"-v",
|
||||
"--voice",
|
||||
help="voice for TTS. Default: Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)",
|
||||
help="voice for TTS. "
|
||||
"Default: Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)",
|
||||
default="Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-c",
|
||||
"--codec",
|
||||
help="codec format. Default: audio-24khz-48kbitrate-mono-mp3. Another choice is webm-24khz-16bit-mono-opus. For more info check https://bit.ly/2T33h6S",
|
||||
help="codec format. Default: audio-24khz-48kbitrate-mono-mp3. "
|
||||
"Another choice is webm-24khz-16bit-mono-opus. "
|
||||
"For more info check https://bit.ly/2T33h6S",
|
||||
default="audio-24khz-48kbitrate-mono-mp3",
|
||||
)
|
||||
group.add_argument(
|
||||
"-l",
|
||||
"--list-voices",
|
||||
help="lists available voices. Edge's list is incomplete so check https://bit.ly/2SFq1d3",
|
||||
help="lists available voices. "
|
||||
"Edge's list is incomplete so check https://bit.ly/2SFq1d3",
|
||||
action="store_true",
|
||||
)
|
||||
parser.add_argument(
|
||||
@ -85,6 +136,10 @@ async def _main():
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.list_voices:
|
||||
await _list_voices()
|
||||
sys.exit(0)
|
||||
|
||||
if args.text is not None or args.file is not None:
|
||||
if args.file is not None:
|
||||
# we need to use sys.stdin.read() because some devices
|
||||
@ -96,45 +151,8 @@ async def _main():
|
||||
# logger.debug("reading from %s" % args.file)
|
||||
with open(args.file, "r", encoding="utf-8") as file:
|
||||
args.text = file.read()
|
||||
tts = Communicate()
|
||||
subs = SubMaker(args.overlapping)
|
||||
if args.write_media:
|
||||
media_file = open(args.write_media, "wb")
|
||||
async for i in tts.run(
|
||||
args.text,
|
||||
args.enable_sentence_boundary,
|
||||
args.enable_word_boundary,
|
||||
args.codec,
|
||||
args.voice,
|
||||
args.pitch,
|
||||
args.rate,
|
||||
args.volume,
|
||||
customspeak=args.custom_ssml,
|
||||
):
|
||||
if i[2] is not None:
|
||||
if not args.write_media:
|
||||
sys.stdout.buffer.write(i[2])
|
||||
else:
|
||||
media_file.write(i[2])
|
||||
elif i[0] is not None and i[1] is not None:
|
||||
subs.create_sub(i[0], i[1])
|
||||
if args.write_media:
|
||||
media_file.close()
|
||||
if not args.write_subtitles:
|
||||
sys.stderr.write(subs.generate_subs())
|
||||
else:
|
||||
with open(args.write_subtitles, "w", encoding="utf-8") as file:
|
||||
file.write(subs.generate_subs())
|
||||
elif args.list_voices:
|
||||
for idx, voice in enumerate(await list_voices()):
|
||||
if idx != 0:
|
||||
print()
|
||||
|
||||
for key in voice.keys():
|
||||
if key in ["SuggestedCodec", "FriendlyName", "Status"]:
|
||||
continue
|
||||
# print ("%s: %s" % ("Name" if key == "ShortName" else key, voice[key]))
|
||||
print(f"{key}: {voice[key]}")
|
||||
await _tts(args)
|
||||
|
||||
|
||||
def main():
|
Loading…
Reference in New Issue
Block a user