From 4fcecddaf0e32957927f0ab92c4ac86fef57b1d5 Mon Sep 17 00:00:00 2001 From: rany Date: Tue, 7 Dec 2021 22:09:43 +0200 Subject: [PATCH] Update to edge-tts 4.0.0 --- build.sh | 3 + build_and_publish.sh | 16 ++- clean.sh | 3 + examples/input_example.py | 18 ++-- publish.sh | 3 + setup.cfg | 6 +- src/edgePlayback/__main__.py | 6 -- .../__init__.py | 26 +++-- src/edge_playback/__main__.py | 10 ++ src/{edgeTTS => edge_tts}/__init__.py | 4 + src/{edgeTTS => edge_tts}/__main__.py | 4 + src/{edgeTTS => edge_tts}/communicate.py | 45 ++++---- src/{edgeTTS => edge_tts}/constants.py | 0 src/{edgeTTS => edge_tts}/list_voices.py | 7 +- src/{edgeTTS => edge_tts}/submaker.py | 55 +++++++++- src/{edgeTTS => edge_tts}/util.py | 102 ++++++++++-------- 16 files changed, 207 insertions(+), 101 deletions(-) create mode 100755 build.sh create mode 100755 clean.sh create mode 100755 publish.sh delete mode 100755 src/edgePlayback/__main__.py rename src/{edgePlayback => edge_playback}/__init__.py (63%) mode change 100755 => 100644 create mode 100644 src/edge_playback/__main__.py rename src/{edgeTTS => edge_tts}/__init__.py (77%) rename src/{edgeTTS => edge_tts}/__main__.py (65%) rename src/{edgeTTS => edge_tts}/communicate.py (89%) rename src/{edgeTTS => edge_tts}/constants.py (100%) rename src/{edgeTTS => edge_tts}/list_voices.py (83%) rename src/{edgeTTS => edge_tts}/submaker.py (55%) rename src/{edgeTTS => edge_tts}/util.py (60%) diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..9e826c6 --- /dev/null +++ b/build.sh @@ -0,0 +1,3 @@ +#!/bin/sh +set -x +exec python3 setup.py sdist bdist_wheel diff --git a/build_and_publish.sh b/build_and_publish.sh index 69c8646..492747e 100755 --- a/build_and_publish.sh +++ b/build_and_publish.sh @@ -1,6 +1,12 @@ #!/bin/sh -set -e -rm -rf build dist src/*.egg-info -python3 setup.py sdist bdist_wheel -twine upload dist/* -rm -rf build dist src/*.egg-info + +set -ex + +./clean.sh + +./build.sh +./publish.sh + +./clean.sh + +exit 0 diff --git a/clean.sh b/clean.sh new file mode 100755 index 0000000..7be66bb --- /dev/null +++ b/clean.sh @@ -0,0 +1,3 @@ +#!/bin/sh +set -x +exec rm -rf build dist src/*.egg-info diff --git a/examples/input_example.py b/examples/input_example.py index dac53f6..f5a9fa4 100755 --- a/examples/input_example.py +++ b/examples/input_example.py @@ -1,21 +1,27 @@ #!/usr/bin/env python3 -# Example Python script that shows how to use edge-tts as a module +""" +Example Python script that shows how to use edge-tts as a module +""" + import asyncio import tempfile from playsound import playsound -import edgeTTS +import edge_tts async def main(): - communicate = edgeTTS.Communicate() + """ + Main function + """ + communicate = edge_tts.Communicate() ask = input("What do you want TTS to say? ") - with tempfile.NamedTemporaryFile() as fp: + with tempfile.NamedTemporaryFile() as temporary_file: async for i in communicate.run(ask): if i[2] is not None: - fp.write(i[2]) - playsound(fp.name) + temporary_file.write(i[2]) + playsound(temporary_file.name) if __name__ == "__main__": diff --git a/publish.sh b/publish.sh new file mode 100755 index 0000000..2841174 --- /dev/null +++ b/publish.sh @@ -0,0 +1,3 @@ +#!/bin/sh +set -x +exec twine upload dist/* diff --git a/setup.cfg b/setup.cfg index cfb3865..d2415a3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = edge-tts -version = 3.0.2 +version = 4.0.0 author = rany author_email = ranygh@riseup.net description = Microsoft Edge's TTS @@ -27,5 +27,5 @@ where=src [options.entry_points] console_scripts = - edge-tts = edgeTTS.__main__:main - edge-playback = edgePlayback.__init__:main + edge-tts = edge_tts.__main__:main + edge-playback = edge_playback.__init__:main diff --git a/src/edgePlayback/__main__.py b/src/edgePlayback/__main__.py deleted file mode 100755 index 8365b7f..0000000 --- a/src/edgePlayback/__main__.py +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env python3 - -from edgePlayback.__init__ import * - -if __name__ == "__main__": - main() diff --git a/src/edgePlayback/__init__.py b/src/edge_playback/__init__.py old mode 100755 new mode 100644 similarity index 63% rename from src/edgePlayback/__init__.py rename to src/edge_playback/__init__.py index f31731f..45c8716 --- a/src/edgePlayback/__init__.py +++ b/src/edge_playback/__init__.py @@ -1,5 +1,9 @@ #!/usr/bin/env python3 +""" +Playback TTS with subtitles using edge-tts and mpv. +""" + import subprocess import sys import tempfile @@ -7,13 +11,16 @@ from shutil import which def main(): + """ + Main function. + """ if which("mpv") and which("edge-tts"): with tempfile.NamedTemporaryFile() as media: with tempfile.NamedTemporaryFile() as subtitle: print() - print("Media file %s" % media.name) - print("Subtitle file %s\n" % subtitle.name) - p = subprocess.Popen( + print(f"Media file {media.name}") + print(f"Subtitle file {subtitle.name}\n") + with subprocess.Popen( [ "edge-tts", "-w", @@ -23,17 +30,18 @@ def main(): subtitle.name, ] + sys.argv[1:] - ) - p.communicate() - p = subprocess.Popen( + ) as process: + process.communicate() + + with subprocess.Popen( [ "mpv", "--keep-open=yes", - "--sub-file=" + subtitle.name, + f"--sub-file={subtitle.name}", media.name, ] - ) - p.communicate() + ) as process: + process.communicate() else: print("This script requires mpv and edge-tts.") diff --git a/src/edge_playback/__main__.py b/src/edge_playback/__main__.py new file mode 100644 index 0000000..2ac8c12 --- /dev/null +++ b/src/edge_playback/__main__.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python3 + +""" +This is the main file for the edge_playback package. +""" + +from edge_playback.__init__ import main + +if __name__ == "__main__": + main() diff --git a/src/edgeTTS/__init__.py b/src/edge_tts/__init__.py similarity index 77% rename from src/edgeTTS/__init__.py rename to src/edge_tts/__init__.py index 927b13c..3880896 100644 --- a/src/edgeTTS/__init__.py +++ b/src/edge_tts/__init__.py @@ -1,3 +1,7 @@ +""" +__init__ for edge_tts +""" + from .communicate import Communicate from .list_voices import list_voices from .submaker import SubMaker diff --git a/src/edgeTTS/__main__.py b/src/edge_tts/__main__.py similarity index 65% rename from src/edgeTTS/__main__.py rename to src/edge_tts/__main__.py index e5e2737..b36f114 100644 --- a/src/edgeTTS/__main__.py +++ b/src/edge_tts/__main__.py @@ -1,3 +1,7 @@ +""" +__main__ for edge_tts. +""" + from .util import main if __name__ == "__main__": diff --git a/src/edgeTTS/communicate.py b/src/edge_tts/communicate.py similarity index 89% rename from src/edgeTTS/communicate.py rename to src/edge_tts/communicate.py index 1021553..6f9f7fb 100644 --- a/src/edgeTTS/communicate.py +++ b/src/edge_tts/communicate.py @@ -53,17 +53,14 @@ def remove_incompatible_characters(string): if isinstance(string, bytes): string = string.decode("utf-8") - cleaned_string = "" - for character in string: - character_code = ord(character) - if ( - (0 <= character_code <= 8) - or (11 <= character_code <= 12) - or (14 <= character_code <= 31) - ): - character = " " - cleaned_string += character - return cleaned_string + string = list(string) + + for idx in range(len(string)): # pylint: disable=consider-using-enumerate + code = ord(string[idx]) + if (0 <= code <= 8) or (11 <= code <= 12) or (14 <= code <= 31): + string[idx] = " " + + return "".join(string) def connect_id(): @@ -144,7 +141,8 @@ def mkssml(text, voice, pitch, rate, volume): ssml = ( "" - f"{text}" + f"" + f"{text}" ) return ssml @@ -192,7 +190,7 @@ def ssml_headers_plus_data(request_id, timestamp, ssml): ) -class Communicate: +class Communicate: # pylint: disable=too-few-public-methods """ Class for communicating with the service. """ @@ -214,7 +212,7 @@ class Communicate: rate="+0%", volume="+0%", customspeak=False, - ): + ): # pylint: disable=too-many-arguments, too-many-locals """ Runs the Communicate class. @@ -266,7 +264,8 @@ class Communicate: "Origin": "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "en-US,en;q=0.9", - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + " (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41", }, ) as websocket: for message in messages: @@ -275,18 +274,22 @@ class Communicate: # Prepare the request to be sent to the service. # - # Note that sentenceBoundaryEnabled and wordBoundaryEnabled are actually supposed - # to be booleans, but Edge Browser seems to send them as strings and not booleans. - # This is a bug in Edge Browser as Azure Cognitive Services actually sends them as - # booleans and not strings. For now I will send them as booleans unless it causes + # Note sentenceBoundaryEnabled and wordBoundaryEnabled are actually supposed + # to be booleans, but Edge Browser seems to send them as strings. + # + # This is a bug in Edge as Azure Cognitive Services actually sends them as + # bool and not string. For now I will send them as bool unless it causes # any problems. # - # Also pay close attention to double { } in request (escape for Python .format()). + # Also pay close attention to double { } in request (escape for f-string). request = ( f"X-Timestamp:{self.date}\r\n" "Content-Type:application/json; charset=utf-8\r\n" "Path:speech.config\r\n\r\n" - f'{{"context":{{"synthesis":{{"audio":{{"metadataoptions":{{"sentenceBoundaryEnabled":{sentence_boundary},"wordBoundaryEnabled":{word_boundary}}},"outputFormat":"{codec}"}}}}}}}}\r\n' + '{"context":{"synthesis":{"audio":{"metadataoptions":{' + f'"sentenceBoundaryEnabled":{sentence_boundary},' + f'"wordBoundaryEnabled":{word_boundary}}},"outputFormat":"{codec}"' + "}}}}\r\n" ) # Send the request to the service. await websocket.send_str(request) diff --git a/src/edgeTTS/constants.py b/src/edge_tts/constants.py similarity index 100% rename from src/edgeTTS/constants.py rename to src/edge_tts/constants.py diff --git a/src/edgeTTS/list_voices.py b/src/edge_tts/list_voices.py similarity index 83% rename from src/edgeTTS/list_voices.py rename to src/edge_tts/list_voices.py index 0fe6091..8d42370 100644 --- a/src/edgeTTS/list_voices.py +++ b/src/edge_tts/list_voices.py @@ -15,9 +15,7 @@ async def list_voices(): This pulls data from the URL used by Microsoft Edge to return a list of all available voices. However many more experimental voices are available - than are listed here. - (See - https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support) + than are listed here. (See https://aka.ms/csspeech/voicenames) Returns: dict: A dictionary of voice attributes. @@ -29,7 +27,8 @@ async def list_voices(): "Authority": "speech.platform.bing.com", "Sec-CH-UA": '" Not;A Brand";v="99", "Microsoft Edge";v="91", "Chromium";v="91"', "Sec-CH-UA-Mobile": "?0", - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41", "Accept": "*/*", "Sec-Fetch-Site": "none", "Sec-Fetch-Mode": "cors", diff --git a/src/edgeTTS/submaker.py b/src/edge_tts/submaker.py similarity index 55% rename from src/edgeTTS/submaker.py rename to src/edge_tts/submaker.py index 44d7e92..fe80b61 100644 --- a/src/edgeTTS/submaker.py +++ b/src/edge_tts/submaker.py @@ -1,29 +1,68 @@ +""" +SubMaker package for the Edge TTS project. + +SubMaker is a package that makes the process of creating subtitles with +information provided by the service easier. +""" + import math from xml.sax.saxutils import escape def formatter(offset1, offset2, subdata): - data = ( + """ + formatter returns the timecode and the text of the subtitle. + """ + return ( f"{mktimestamp(offset1)} --> {mktimestamp(offset2)}\r\n" f"{escape(subdata)}\r\n\r\n" ) - return data def mktimestamp(time_unit): - hour = math.floor(time_unit / 10000 / 1000 / 3600) - minute = math.floor((time_unit / 10000 / 1000 / 60) % 60) - seconds = (time_unit / 10000 / 1000) % 60 + """ + mktimestamp returns the timecode of the subtitle. + + The timecode is in the format of 00:00:00.000. + + Returns: + str: The timecode of the subtitle. + """ + hour = math.floor(time_unit / 10 ** 7 / 3600) + minute = math.floor((time_unit / 10 ** 7 / 60) % 60) + seconds = (time_unit / 10 ** 7) % 60 return f"{hour:02d}:{minute:02d}:{seconds:06.3f}" class SubMaker: + """ + SubMaker class + """ + def __init__(self, overlapping=5): + """ + SubMaker constructor. + + Args: + overlapping (int): The amount of time in seconds that the + subtitles should overlap. + """ self.subs_and_offset = [] self.broken_offset = [] self.overlapping = overlapping * (10 ** 7) def create_sub(self, timestamp, text): + """ + create_sub creates a subtitle with the given timestamp and text + and adds it to the list of subtitles + + Args: + timestamp (int): The timestamp of the subtitle. + text (str): The text of the subtitle. + + Returns: + None + """ if len(self.subs_and_offset) >= 2: if self.subs_and_offset[-2] >= timestamp + sum(self.broken_offset): self.broken_offset.append(self.subs_and_offset[-2]) @@ -33,6 +72,12 @@ class SubMaker: self.subs_and_offset.append(text) def generate_subs(self): + """ + generate_subs generates the complete subtitle file. + + Returns: + str: The complete subtitle file. + """ if len(self.subs_and_offset) >= 2: data = "WEBVTT\r\n\r\n" old_time_stamp = None diff --git a/src/edgeTTS/util.py b/src/edge_tts/util.py similarity index 60% rename from src/edgeTTS/util.py rename to src/edge_tts/util.py index 68c7ff3..8d64863 100644 --- a/src/edgeTTS/util.py +++ b/src/edge_tts/util.py @@ -7,7 +7,54 @@ import argparse import asyncio import sys -from edgeTTS import Communicate, SubMaker, list_voices +from edge_tts import Communicate, SubMaker, list_voices + + +async def _list_voices(): + """ + List available voices. + """ + for idx, voice in enumerate(await list_voices()): + if idx != 0: + print() + + for key in voice.keys(): + if key in ["SuggestedCodec", "FriendlyName", "Status"]: + continue + # print ("%s: %s" % ("Name" if key == "ShortName" else key, voice[key])) + print(f"{key}: {voice[key]}") + + +async def _tts(args): + tts = Communicate() + subs = SubMaker(args.overlapping) + if args.write_media: + media_file = open(args.write_media, "wb") # pylint: disable=consider-using-with + async for i in tts.run( + args.text, + args.enable_sentence_boundary, + args.enable_word_boundary, + args.codec, + args.voice, + args.pitch, + args.rate, + args.volume, + customspeak=args.custom_ssml, + ): + if i[2] is not None: + if not args.write_media: + sys.stdout.buffer.write(i[2]) + else: + media_file.write(i[2]) + elif i[0] is not None and i[1] is not None: + subs.create_sub(i[0], i[1]) + if args.write_media: + media_file.close() + if not args.write_subtitles: + sys.stderr.write(subs.generate_subs()) + else: + with open(args.write_subtitles, "w", encoding="utf-8") as file: + file.write(subs.generate_subs()) async def _main(): @@ -24,19 +71,23 @@ async def _main(): parser.add_argument( "-v", "--voice", - help="voice for TTS. Default: Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)", + help="voice for TTS. " + "Default: Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)", default="Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)", ) parser.add_argument( "-c", "--codec", - help="codec format. Default: audio-24khz-48kbitrate-mono-mp3. Another choice is webm-24khz-16bit-mono-opus. For more info check https://bit.ly/2T33h6S", + help="codec format. Default: audio-24khz-48kbitrate-mono-mp3. " + "Another choice is webm-24khz-16bit-mono-opus. " + "For more info check https://bit.ly/2T33h6S", default="audio-24khz-48kbitrate-mono-mp3", ) group.add_argument( "-l", "--list-voices", - help="lists available voices. Edge's list is incomplete so check https://bit.ly/2SFq1d3", + help="lists available voices. " + "Edge's list is incomplete so check https://bit.ly/2SFq1d3", action="store_true", ) parser.add_argument( @@ -85,6 +136,10 @@ async def _main(): ) args = parser.parse_args() + if args.list_voices: + await _list_voices() + sys.exit(0) + if args.text is not None or args.file is not None: if args.file is not None: # we need to use sys.stdin.read() because some devices @@ -96,45 +151,8 @@ async def _main(): # logger.debug("reading from %s" % args.file) with open(args.file, "r", encoding="utf-8") as file: args.text = file.read() - tts = Communicate() - subs = SubMaker(args.overlapping) - if args.write_media: - media_file = open(args.write_media, "wb") - async for i in tts.run( - args.text, - args.enable_sentence_boundary, - args.enable_word_boundary, - args.codec, - args.voice, - args.pitch, - args.rate, - args.volume, - customspeak=args.custom_ssml, - ): - if i[2] is not None: - if not args.write_media: - sys.stdout.buffer.write(i[2]) - else: - media_file.write(i[2]) - elif i[0] is not None and i[1] is not None: - subs.create_sub(i[0], i[1]) - if args.write_media: - media_file.close() - if not args.write_subtitles: - sys.stderr.write(subs.generate_subs()) - else: - with open(args.write_subtitles, "w", encoding="utf-8") as file: - file.write(subs.generate_subs()) - elif args.list_voices: - for idx, voice in enumerate(await list_voices()): - if idx != 0: - print() - for key in voice.keys(): - if key in ["SuggestedCodec", "FriendlyName", "Status"]: - continue - # print ("%s: %s" % ("Name" if key == "ShortName" else key, voice[key])) - print(f"{key}: {voice[key]}") + await _tts(args) def main():