Update to edge-tts 4.0.0

This commit is contained in:
rany 2021-12-07 22:09:43 +02:00
parent 756766fe6e
commit 4fcecddaf0
16 changed files with 207 additions and 101 deletions

3
build.sh Executable file
View File

@ -0,0 +1,3 @@
#!/bin/sh
set -x
exec python3 setup.py sdist bdist_wheel

View File

@ -1,6 +1,12 @@
#!/bin/sh
set -e
rm -rf build dist src/*.egg-info
python3 setup.py sdist bdist_wheel
twine upload dist/*
rm -rf build dist src/*.egg-info
set -ex
./clean.sh
./build.sh
./publish.sh
./clean.sh
exit 0

3
clean.sh Executable file
View File

@ -0,0 +1,3 @@
#!/bin/sh
set -x
exec rm -rf build dist src/*.egg-info

View File

@ -1,21 +1,27 @@
#!/usr/bin/env python3
# Example Python script that shows how to use edge-tts as a module
"""
Example Python script that shows how to use edge-tts as a module
"""
import asyncio
import tempfile
from playsound import playsound
import edgeTTS
import edge_tts
async def main():
communicate = edgeTTS.Communicate()
"""
Main function
"""
communicate = edge_tts.Communicate()
ask = input("What do you want TTS to say? ")
with tempfile.NamedTemporaryFile() as fp:
with tempfile.NamedTemporaryFile() as temporary_file:
async for i in communicate.run(ask):
if i[2] is not None:
fp.write(i[2])
playsound(fp.name)
temporary_file.write(i[2])
playsound(temporary_file.name)
if __name__ == "__main__":

3
publish.sh Executable file
View File

@ -0,0 +1,3 @@
#!/bin/sh
set -x
exec twine upload dist/*

View File

@ -1,6 +1,6 @@
[metadata]
name = edge-tts
version = 3.0.2
version = 4.0.0
author = rany
author_email = ranygh@riseup.net
description = Microsoft Edge's TTS
@ -27,5 +27,5 @@ where=src
[options.entry_points]
console_scripts =
edge-tts = edgeTTS.__main__:main
edge-playback = edgePlayback.__init__:main
edge-tts = edge_tts.__main__:main
edge-playback = edge_playback.__init__:main

View File

@ -1,6 +0,0 @@
#!/usr/bin/env python3
from edgePlayback.__init__ import *
if __name__ == "__main__":
main()

View File

@ -1,5 +1,9 @@
#!/usr/bin/env python3
"""
Playback TTS with subtitles using edge-tts and mpv.
"""
import subprocess
import sys
import tempfile
@ -7,13 +11,16 @@ from shutil import which
def main():
"""
Main function.
"""
if which("mpv") and which("edge-tts"):
with tempfile.NamedTemporaryFile() as media:
with tempfile.NamedTemporaryFile() as subtitle:
print()
print("Media file %s" % media.name)
print("Subtitle file %s\n" % subtitle.name)
p = subprocess.Popen(
print(f"Media file {media.name}")
print(f"Subtitle file {subtitle.name}\n")
with subprocess.Popen(
[
"edge-tts",
"-w",
@ -23,17 +30,18 @@ def main():
subtitle.name,
]
+ sys.argv[1:]
)
p.communicate()
p = subprocess.Popen(
) as process:
process.communicate()
with subprocess.Popen(
[
"mpv",
"--keep-open=yes",
"--sub-file=" + subtitle.name,
f"--sub-file={subtitle.name}",
media.name,
]
)
p.communicate()
) as process:
process.communicate()
else:
print("This script requires mpv and edge-tts.")

View File

@ -0,0 +1,10 @@
#!/usr/bin/env python3
"""
This is the main file for the edge_playback package.
"""
from edge_playback.__init__ import main
if __name__ == "__main__":
main()

View File

@ -1,3 +1,7 @@
"""
__init__ for edge_tts
"""
from .communicate import Communicate
from .list_voices import list_voices
from .submaker import SubMaker

View File

@ -1,3 +1,7 @@
"""
__main__ for edge_tts.
"""
from .util import main
if __name__ == "__main__":

View File

@ -53,17 +53,14 @@ def remove_incompatible_characters(string):
if isinstance(string, bytes):
string = string.decode("utf-8")
cleaned_string = ""
for character in string:
character_code = ord(character)
if (
(0 <= character_code <= 8)
or (11 <= character_code <= 12)
or (14 <= character_code <= 31)
):
character = " "
cleaned_string += character
return cleaned_string
string = list(string)
for idx in range(len(string)): # pylint: disable=consider-using-enumerate
code = ord(string[idx])
if (0 <= code <= 8) or (11 <= code <= 12) or (14 <= code <= 31):
string[idx] = " "
return "".join(string)
def connect_id():
@ -144,7 +141,8 @@ def mkssml(text, voice, pitch, rate, volume):
ssml = (
"<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>"
f"<voice name='{voice}'><prosody pitch='{pitch}' rate='{rate}' volume='{volume}'>{text}</prosody></voice></speak>"
f"<voice name='{voice}'><prosody pitch='{pitch}' rate='{rate}' volume='{volume}'>"
f"{text}</prosody></voice></speak>"
)
return ssml
@ -192,7 +190,7 @@ def ssml_headers_plus_data(request_id, timestamp, ssml):
)
class Communicate:
class Communicate: # pylint: disable=too-few-public-methods
"""
Class for communicating with the service.
"""
@ -214,7 +212,7 @@ class Communicate:
rate="+0%",
volume="+0%",
customspeak=False,
):
): # pylint: disable=too-many-arguments, too-many-locals
"""
Runs the Communicate class.
@ -266,7 +264,8 @@ class Communicate:
"Origin": "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.9",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
},
) as websocket:
for message in messages:
@ -275,18 +274,22 @@ class Communicate:
# Prepare the request to be sent to the service.
#
# Note that sentenceBoundaryEnabled and wordBoundaryEnabled are actually supposed
# to be booleans, but Edge Browser seems to send them as strings and not booleans.
# This is a bug in Edge Browser as Azure Cognitive Services actually sends them as
# booleans and not strings. For now I will send them as booleans unless it causes
# Note sentenceBoundaryEnabled and wordBoundaryEnabled are actually supposed
# to be booleans, but Edge Browser seems to send them as strings.
#
# This is a bug in Edge as Azure Cognitive Services actually sends them as
# bool and not string. For now I will send them as bool unless it causes
# any problems.
#
# Also pay close attention to double { } in request (escape for Python .format()).
# Also pay close attention to double { } in request (escape for f-string).
request = (
f"X-Timestamp:{self.date}\r\n"
"Content-Type:application/json; charset=utf-8\r\n"
"Path:speech.config\r\n\r\n"
f'{{"context":{{"synthesis":{{"audio":{{"metadataoptions":{{"sentenceBoundaryEnabled":{sentence_boundary},"wordBoundaryEnabled":{word_boundary}}},"outputFormat":"{codec}"}}}}}}}}\r\n'
'{"context":{"synthesis":{"audio":{"metadataoptions":{'
f'"sentenceBoundaryEnabled":{sentence_boundary},'
f'"wordBoundaryEnabled":{word_boundary}}},"outputFormat":"{codec}"'
"}}}}\r\n"
)
# Send the request to the service.
await websocket.send_str(request)

View File

@ -15,9 +15,7 @@ async def list_voices():
This pulls data from the URL used by Microsoft Edge to return a list of
all available voices. However many more experimental voices are available
than are listed here.
(See
https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support)
than are listed here. (See https://aka.ms/csspeech/voicenames)
Returns:
dict: A dictionary of voice attributes.
@ -29,7 +27,8 @@ async def list_voices():
"Authority": "speech.platform.bing.com",
"Sec-CH-UA": '" Not;A Brand";v="99", "Microsoft Edge";v="91", "Chromium";v="91"',
"Sec-CH-UA-Mobile": "?0",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
"Accept": "*/*",
"Sec-Fetch-Site": "none",
"Sec-Fetch-Mode": "cors",

View File

@ -1,29 +1,68 @@
"""
SubMaker package for the Edge TTS project.
SubMaker is a package that makes the process of creating subtitles with
information provided by the service easier.
"""
import math
from xml.sax.saxutils import escape
def formatter(offset1, offset2, subdata):
data = (
"""
formatter returns the timecode and the text of the subtitle.
"""
return (
f"{mktimestamp(offset1)} --> {mktimestamp(offset2)}\r\n"
f"{escape(subdata)}\r\n\r\n"
)
return data
def mktimestamp(time_unit):
hour = math.floor(time_unit / 10000 / 1000 / 3600)
minute = math.floor((time_unit / 10000 / 1000 / 60) % 60)
seconds = (time_unit / 10000 / 1000) % 60
"""
mktimestamp returns the timecode of the subtitle.
The timecode is in the format of 00:00:00.000.
Returns:
str: The timecode of the subtitle.
"""
hour = math.floor(time_unit / 10 ** 7 / 3600)
minute = math.floor((time_unit / 10 ** 7 / 60) % 60)
seconds = (time_unit / 10 ** 7) % 60
return f"{hour:02d}:{minute:02d}:{seconds:06.3f}"
class SubMaker:
"""
SubMaker class
"""
def __init__(self, overlapping=5):
"""
SubMaker constructor.
Args:
overlapping (int): The amount of time in seconds that the
subtitles should overlap.
"""
self.subs_and_offset = []
self.broken_offset = []
self.overlapping = overlapping * (10 ** 7)
def create_sub(self, timestamp, text):
"""
create_sub creates a subtitle with the given timestamp and text
and adds it to the list of subtitles
Args:
timestamp (int): The timestamp of the subtitle.
text (str): The text of the subtitle.
Returns:
None
"""
if len(self.subs_and_offset) >= 2:
if self.subs_and_offset[-2] >= timestamp + sum(self.broken_offset):
self.broken_offset.append(self.subs_and_offset[-2])
@ -33,6 +72,12 @@ class SubMaker:
self.subs_and_offset.append(text)
def generate_subs(self):
"""
generate_subs generates the complete subtitle file.
Returns:
str: The complete subtitle file.
"""
if len(self.subs_and_offset) >= 2:
data = "WEBVTT\r\n\r\n"
old_time_stamp = None

View File

@ -7,7 +7,54 @@ import argparse
import asyncio
import sys
from edgeTTS import Communicate, SubMaker, list_voices
from edge_tts import Communicate, SubMaker, list_voices
async def _list_voices():
"""
List available voices.
"""
for idx, voice in enumerate(await list_voices()):
if idx != 0:
print()
for key in voice.keys():
if key in ["SuggestedCodec", "FriendlyName", "Status"]:
continue
# print ("%s: %s" % ("Name" if key == "ShortName" else key, voice[key]))
print(f"{key}: {voice[key]}")
async def _tts(args):
tts = Communicate()
subs = SubMaker(args.overlapping)
if args.write_media:
media_file = open(args.write_media, "wb") # pylint: disable=consider-using-with
async for i in tts.run(
args.text,
args.enable_sentence_boundary,
args.enable_word_boundary,
args.codec,
args.voice,
args.pitch,
args.rate,
args.volume,
customspeak=args.custom_ssml,
):
if i[2] is not None:
if not args.write_media:
sys.stdout.buffer.write(i[2])
else:
media_file.write(i[2])
elif i[0] is not None and i[1] is not None:
subs.create_sub(i[0], i[1])
if args.write_media:
media_file.close()
if not args.write_subtitles:
sys.stderr.write(subs.generate_subs())
else:
with open(args.write_subtitles, "w", encoding="utf-8") as file:
file.write(subs.generate_subs())
async def _main():
@ -24,19 +71,23 @@ async def _main():
parser.add_argument(
"-v",
"--voice",
help="voice for TTS. Default: Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)",
help="voice for TTS. "
"Default: Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)",
default="Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)",
)
parser.add_argument(
"-c",
"--codec",
help="codec format. Default: audio-24khz-48kbitrate-mono-mp3. Another choice is webm-24khz-16bit-mono-opus. For more info check https://bit.ly/2T33h6S",
help="codec format. Default: audio-24khz-48kbitrate-mono-mp3. "
"Another choice is webm-24khz-16bit-mono-opus. "
"For more info check https://bit.ly/2T33h6S",
default="audio-24khz-48kbitrate-mono-mp3",
)
group.add_argument(
"-l",
"--list-voices",
help="lists available voices. Edge's list is incomplete so check https://bit.ly/2SFq1d3",
help="lists available voices. "
"Edge's list is incomplete so check https://bit.ly/2SFq1d3",
action="store_true",
)
parser.add_argument(
@ -85,6 +136,10 @@ async def _main():
)
args = parser.parse_args()
if args.list_voices:
await _list_voices()
sys.exit(0)
if args.text is not None or args.file is not None:
if args.file is not None:
# we need to use sys.stdin.read() because some devices
@ -96,45 +151,8 @@ async def _main():
# logger.debug("reading from %s" % args.file)
with open(args.file, "r", encoding="utf-8") as file:
args.text = file.read()
tts = Communicate()
subs = SubMaker(args.overlapping)
if args.write_media:
media_file = open(args.write_media, "wb")
async for i in tts.run(
args.text,
args.enable_sentence_boundary,
args.enable_word_boundary,
args.codec,
args.voice,
args.pitch,
args.rate,
args.volume,
customspeak=args.custom_ssml,
):
if i[2] is not None:
if not args.write_media:
sys.stdout.buffer.write(i[2])
else:
media_file.write(i[2])
elif i[0] is not None and i[1] is not None:
subs.create_sub(i[0], i[1])
if args.write_media:
media_file.close()
if not args.write_subtitles:
sys.stderr.write(subs.generate_subs())
else:
with open(args.write_subtitles, "w", encoding="utf-8") as file:
file.write(subs.generate_subs())
elif args.list_voices:
for idx, voice in enumerate(await list_voices()):
if idx != 0:
print()
for key in voice.keys():
if key in ["SuggestedCodec", "FriendlyName", "Status"]:
continue
# print ("%s: %s" % ("Name" if key == "ShortName" else key, voice[key]))
print(f"{key}: {voice[key]}")
await _tts(args)
def main():