Update to edge-tts 4.0.0

2024-11-22 01:45:02 +00:00 · 2021-12-07 22:09:43 +02:00 · 2021-12-07 22:09:43 +02:00 · 4fcecddaf0
commit 4fcecddaf0
parent 756766fe6e
16 changed files with 207 additions and 101 deletions
--- a/build.sh
+++ b/build.sh
@ -0,0 +1,3 @@
+#!/bin/sh
+set -x
+exec python3 setup.py sdist bdist_wheel
--- a/build_and_publish.sh
+++ b/build_and_publish.sh
@ -1,6 +1,12 @@
 #!/bin/sh
-set -e
-rm -rf build dist src/*.egg-info
-python3 setup.py sdist bdist_wheel
-twine upload dist/*
-rm -rf build dist src/*.egg-info
+
+set -ex
+
+./clean.sh
+
+./build.sh
+./publish.sh
+
+./clean.sh
+
+exit 0
--- a/clean.sh
+++ b/clean.sh
@ -0,0 +1,3 @@
+#!/bin/sh
+set -x
+exec rm -rf build dist src/*.egg-info
--- a/examples/input_example.py
+++ b/examples/input_example.py
@ -1,21 +1,27 @@
 #!/usr/bin/env python3
-# Example Python script that shows how to use edge-tts as a module
+"""
+Example Python script that shows how to use edge-tts as a module
+"""
+
 import asyncio
 import tempfile

 from playsound import playsound

-import edgeTTS
+import edge_tts


 async def main():
-    communicate = edgeTTS.Communicate()
+    """
+    Main function
+    """
+    communicate = edge_tts.Communicate()
    ask = input("What do you want TTS to say? ")
-    with tempfile.NamedTemporaryFile() as fp:
+    with tempfile.NamedTemporaryFile() as temporary_file:
        async for i in communicate.run(ask):
            if i[2] is not None:
-                fp.write(i[2])
-        playsound(fp.name)
+                temporary_file.write(i[2])
+        playsound(temporary_file.name)


 if __name__ == "__main__":
--- a/publish.sh
+++ b/publish.sh
@ -0,0 +1,3 @@
+#!/bin/sh
+set -x
+exec twine upload dist/*
--- a/setup.cfg
+++ b/setup.cfg
@ -1,6 +1,6 @@
 [metadata]
 name = edge-tts
-version = 3.0.2
+version = 4.0.0
 author = rany
 author_email = ranygh@riseup.net
 description = Microsoft Edge's TTS
@ -27,5 +27,5 @@ where=src
   
 [options.entry_points]
 console_scripts =
-    edge-tts = edgeTTS.__main__:main
-    edge-playback = edgePlayback.__init__:main
+    edge-tts = edge_tts.__main__:main
+    edge-playback = edge_playback.__init__:main
--- a/src/edgePlayback/main.py
+++ b/src/edgePlayback/main.py
@ -1,6 +0,0 @@
-#!/usr/bin/env python3
-
-from edgePlayback.__init__ import *
-
-if __name__ == "__main__":
-    main()
--- a/src/edge_playback/init.py
+++ b/src/edge_playback/init.py
@ -1,5 +1,9 @@
 #!/usr/bin/env python3

+"""
+Playback TTS with subtitles using edge-tts and mpv.
+"""
+
 import subprocess
 import sys
 import tempfile
@ -7,13 +11,16 @@ from shutil import which


 def main():
+    """
+    Main function.
+    """
    if which("mpv") and which("edge-tts"):
        with tempfile.NamedTemporaryFile() as media:
            with tempfile.NamedTemporaryFile() as subtitle:
                print()
-                print("Media file      %s" % media.name)
-                print("Subtitle file   %s\n" % subtitle.name)
-                p = subprocess.Popen(
+                print(f"Media file      {media.name}")
+                print(f"Subtitle file   {subtitle.name}\n")
+                with subprocess.Popen(
                    [
                        "edge-tts",
                        "-w",
@ -23,17 +30,18 @@ def main():
                        subtitle.name,
                    ]
                    + sys.argv[1:]
-                )
-                p.communicate()
-                p = subprocess.Popen(
+                ) as process:
+                    process.communicate()
+
+                with subprocess.Popen(
                    [
                        "mpv",
                        "--keep-open=yes",
-                        "--sub-file=" + subtitle.name,
+                        f"--sub-file={subtitle.name}",
                        media.name,
                    ]
-                )
-                p.communicate()
+                ) as process:
+                    process.communicate()
    else:
        print("This script requires mpv and edge-tts.")

--- a/src/edge_playback/main.py
+++ b/src/edge_playback/main.py
@ -0,0 +1,10 @@
+#!/usr/bin/env python3
+
+"""
+This is the main file for the edge_playback package.
+"""
+
+from edge_playback.__init__ import main
+
+if __name__ == "__main__":
+    main()
--- a/src/edge_tts/init.py
+++ b/src/edge_tts/init.py
@ -1,3 +1,7 @@
+"""
+__init__ for edge_tts
+"""
+
 from .communicate import Communicate
 from .list_voices import list_voices
 from .submaker import SubMaker
--- a/src/edge_tts/main.py
+++ b/src/edge_tts/main.py
@ -1,3 +1,7 @@
+"""
+__main__ for edge_tts.
+"""
+
 from .util import main

 if __name__ == "__main__":
--- a/src/edge_tts/communicate.py
+++ b/src/edge_tts/communicate.py
@ -53,17 +53,14 @@ def remove_incompatible_characters(string):
    if isinstance(string, bytes):
        string = string.decode("utf-8")

-    cleaned_string = ""
-    for character in string:
-        character_code = ord(character)
-        if (
-            (0 <= character_code <= 8)
-            or (11 <= character_code <= 12)
-            or (14 <= character_code <= 31)
-        ):
-            character = " "
-        cleaned_string += character
-    return cleaned_string
+    string = list(string)
+
+    for idx in range(len(string)):  # pylint: disable=consider-using-enumerate
+        code = ord(string[idx])
+        if (0 <= code <= 8) or (11 <= code <= 12) or (14 <= code <= 31):
+            string[idx] = " "
+
+    return "".join(string)


 def connect_id():
@ -144,7 +141,8 @@ def mkssml(text, voice, pitch, rate, volume):

    ssml = (
        "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>"
-        f"<voice name='{voice}'><prosody pitch='{pitch}' rate='{rate}' volume='{volume}'>{text}</prosody></voice></speak>"
+        f"<voice name='{voice}'><prosody pitch='{pitch}' rate='{rate}' volume='{volume}'>"
+        f"{text}</prosody></voice></speak>"
    )
    return ssml

@ -192,7 +190,7 @@ def ssml_headers_plus_data(request_id, timestamp, ssml):
    )


-class Communicate:
+class Communicate:  # pylint: disable=too-few-public-methods
    """
    Class for communicating with the service.
    """
@ -214,7 +212,7 @@ class Communicate:
        rate="+0%",
        volume="+0%",
        customspeak=False,
-    ):
+    ):  # pylint: disable=too-many-arguments, too-many-locals
        """
        Runs the Communicate class.

@ -266,7 +264,8 @@ class Communicate:
                    "Origin": "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
                    "Accept-Encoding": "gzip, deflate, br",
                    "Accept-Language": "en-US,en;q=0.9",
-                    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
+                    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+                    " (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
                },
            ) as websocket:
                for message in messages:
@ -275,18 +274,22 @@ class Communicate:

                    # Prepare the request to be sent to the service.
                    #
-                    # Note that sentenceBoundaryEnabled and wordBoundaryEnabled are actually supposed
-                    # to be booleans, but Edge Browser seems to send them as strings and not booleans.
-                    # This is a bug in Edge Browser as Azure Cognitive Services actually sends them as
-                    # booleans and not strings. For now I will send them as booleans unless it causes
+                    # Note sentenceBoundaryEnabled and wordBoundaryEnabled are actually supposed
+                    # to be booleans, but Edge Browser seems to send them as strings.
+                    #
+                    # This is a bug in Edge as Azure Cognitive Services actually sends them as
+                    # bool and not string. For now I will send them as bool unless it causes
                    # any problems.
                    #
-                    # Also pay close attention to double {  } in request (escape for Python .format()).
+                    # Also pay close attention to double { } in request (escape for f-string).
                    request = (
                        f"X-Timestamp:{self.date}\r\n"
                        "Content-Type:application/json; charset=utf-8\r\n"
                        "Path:speech.config\r\n\r\n"
-                        f'{{"context":{{"synthesis":{{"audio":{{"metadataoptions":{{"sentenceBoundaryEnabled":{sentence_boundary},"wordBoundaryEnabled":{word_boundary}}},"outputFormat":"{codec}"}}}}}}}}\r\n'
+                        '{"context":{"synthesis":{"audio":{"metadataoptions":{'
+                        f'"sentenceBoundaryEnabled":{sentence_boundary},'
+                        f'"wordBoundaryEnabled":{word_boundary}}},"outputFormat":"{codec}"'
+                        "}}}}\r\n"
                    )
                    # Send the request to the service.
                    await websocket.send_str(request)
--- a/src/edge_tts/constants.py
+++ b/src/edge_tts/constants.py
--- a/src/edge_tts/list_voices.py
+++ b/src/edge_tts/list_voices.py
@ -15,9 +15,7 @@ async def list_voices():

    This pulls data from the URL used by Microsoft Edge to return a list of
    all available voices. However many more experimental voices are available
-    than are listed here.
-    (See
-    https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support)
+    than are listed here. (See https://aka.ms/csspeech/voicenames)

    Returns:
        dict: A dictionary of voice attributes.
@ -29,7 +27,8 @@ async def list_voices():
                "Authority": "speech.platform.bing.com",
                "Sec-CH-UA": '" Not;A Brand";v="99", "Microsoft Edge";v="91", "Chromium";v="91"',
                "Sec-CH-UA-Mobile": "?0",
-                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
+                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+                "(KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
                "Accept": "*/*",
                "Sec-Fetch-Site": "none",
                "Sec-Fetch-Mode": "cors",
--- a/src/edge_tts/submaker.py
+++ b/src/edge_tts/submaker.py
@ -1,29 +1,68 @@
+"""
+SubMaker package for the Edge TTS project.
+
+SubMaker is a package that makes the process of creating subtitles with
+information provided by the service easier.
+"""
+
 import math
 from xml.sax.saxutils import escape


 def formatter(offset1, offset2, subdata):
-    data = (
+    """
+    formatter returns the timecode and the text of the subtitle.
+    """
+    return (
        f"{mktimestamp(offset1)} --> {mktimestamp(offset2)}\r\n"
        f"{escape(subdata)}\r\n\r\n"
    )
-    return data


 def mktimestamp(time_unit):
-    hour = math.floor(time_unit / 10000 / 1000 / 3600)
-    minute = math.floor((time_unit / 10000 / 1000 / 60) % 60)
-    seconds = (time_unit / 10000 / 1000) % 60
+    """
+    mktimestamp returns the timecode of the subtitle.
+
+    The timecode is in the format of 00:00:00.000.
+
+    Returns:
+        str: The timecode of the subtitle.
+    """
+    hour = math.floor(time_unit / 10 ** 7 / 3600)
+    minute = math.floor((time_unit / 10 ** 7 / 60) % 60)
+    seconds = (time_unit / 10 ** 7) % 60
    return f"{hour:02d}:{minute:02d}:{seconds:06.3f}"


 class SubMaker:
+    """
+    SubMaker class
+    """
+
    def __init__(self, overlapping=5):
+        """
+        SubMaker constructor.
+
+        Args:
+            overlapping (int): The amount of time in seconds that the
+                               subtitles should overlap.
+        """
        self.subs_and_offset = []
        self.broken_offset = []
        self.overlapping = overlapping * (10 ** 7)

    def create_sub(self, timestamp, text):
+        """
+        create_sub creates a subtitle with the given timestamp and text
+        and adds it to the list of subtitles
+
+        Args:
+            timestamp (int): The timestamp of the subtitle.
+            text (str): The text of the subtitle.
+
+        Returns:
+            None
+        """
        if len(self.subs_and_offset) >= 2:
            if self.subs_and_offset[-2] >= timestamp + sum(self.broken_offset):
                self.broken_offset.append(self.subs_and_offset[-2])
@ -33,6 +72,12 @@ class SubMaker:
        self.subs_and_offset.append(text)

    def generate_subs(self):
+        """
+        generate_subs generates the complete subtitle file.
+
+        Returns:
+            str: The complete subtitle file.
+        """
        if len(self.subs_and_offset) >= 2:
            data = "WEBVTT\r\n\r\n"
            old_time_stamp = None
--- a/src/edge_tts/util.py
+++ b/src/edge_tts/util.py
@ -7,7 +7,54 @@ import argparse
 import asyncio
 import sys

-from edgeTTS import Communicate, SubMaker, list_voices
+from edge_tts import Communicate, SubMaker, list_voices
+
+
+async def _list_voices():
+    """
+    List available voices.
+    """
+    for idx, voice in enumerate(await list_voices()):
+        if idx != 0:
+            print()
+
+        for key in voice.keys():
+            if key in ["SuggestedCodec", "FriendlyName", "Status"]:
+                continue
+            # print ("%s: %s" % ("Name" if key == "ShortName" else key, voice[key]))
+            print(f"{key}: {voice[key]}")
+
+
+async def _tts(args):
+    tts = Communicate()
+    subs = SubMaker(args.overlapping)
+    if args.write_media:
+        media_file = open(args.write_media, "wb")  # pylint: disable=consider-using-with
+    async for i in tts.run(
+        args.text,
+        args.enable_sentence_boundary,
+        args.enable_word_boundary,
+        args.codec,
+        args.voice,
+        args.pitch,
+        args.rate,
+        args.volume,
+        customspeak=args.custom_ssml,
+    ):
+        if i[2] is not None:
+            if not args.write_media:
+                sys.stdout.buffer.write(i[2])
+            else:
+                media_file.write(i[2])
+        elif i[0] is not None and i[1] is not None:
+            subs.create_sub(i[0], i[1])
+    if args.write_media:
+        media_file.close()
+    if not args.write_subtitles:
+        sys.stderr.write(subs.generate_subs())
+    else:
+        with open(args.write_subtitles, "w", encoding="utf-8") as file:
+            file.write(subs.generate_subs())


 async def _main():
@ -24,19 +71,23 @@ async def _main():
    parser.add_argument(
        "-v",
        "--voice",
-        help="voice for TTS. Default: Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)",
+        help="voice for TTS. "
+        "Default: Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)",
        default="Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)",
    )
    parser.add_argument(
        "-c",
        "--codec",
-        help="codec format. Default: audio-24khz-48kbitrate-mono-mp3. Another choice is webm-24khz-16bit-mono-opus. For more info check https://bit.ly/2T33h6S",
+        help="codec format. Default: audio-24khz-48kbitrate-mono-mp3. "
+        "Another choice is webm-24khz-16bit-mono-opus. "
+        "For more info check https://bit.ly/2T33h6S",
        default="audio-24khz-48kbitrate-mono-mp3",
    )
    group.add_argument(
        "-l",
        "--list-voices",
-        help="lists available voices. Edge's list is incomplete so check https://bit.ly/2SFq1d3",
+        help="lists available voices. "
+        "Edge's list is incomplete so check https://bit.ly/2SFq1d3",
        action="store_true",
    )
    parser.add_argument(
@ -85,6 +136,10 @@ async def _main():
    )
    args = parser.parse_args()

+    if args.list_voices:
+        await _list_voices()
+        sys.exit(0)
+
    if args.text is not None or args.file is not None:
        if args.file is not None:
            # we need to use sys.stdin.read() because some devices
@ -96,45 +151,8 @@ async def _main():
                # logger.debug("reading from %s" % args.file)
                with open(args.file, "r", encoding="utf-8") as file:
                    args.text = file.read()
-        tts = Communicate()
-        subs = SubMaker(args.overlapping)
-        if args.write_media:
-            media_file = open(args.write_media, "wb")
-        async for i in tts.run(
-            args.text,
-            args.enable_sentence_boundary,
-            args.enable_word_boundary,
-            args.codec,
-            args.voice,
-            args.pitch,
-            args.rate,
-            args.volume,
-            customspeak=args.custom_ssml,
-        ):
-            if i[2] is not None:
-                if not args.write_media:
-                    sys.stdout.buffer.write(i[2])
-                else:
-                    media_file.write(i[2])
-            elif i[0] is not None and i[1] is not None:
-                subs.create_sub(i[0], i[1])
-        if args.write_media:
-            media_file.close()
-        if not args.write_subtitles:
-            sys.stderr.write(subs.generate_subs())
-        else:
-            with open(args.write_subtitles, "w", encoding="utf-8") as file:
-                file.write(subs.generate_subs())
-    elif args.list_voices:
-        for idx, voice in enumerate(await list_voices()):
-            if idx != 0:
-                print()

-            for key in voice.keys():
-                if key in ["SuggestedCodec", "FriendlyName", "Status"]:
-                    continue
-                # print ("%s: %s" % ("Name" if key == "ShortName" else key, voice[key]))
-                print(f"{key}: {voice[key]}")
+        await _tts(args)


 def main():