diff --git a/build.sh b/build.sh
new file mode 100755
index 0000000..9e826c6
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+set -x
+exec python3 setup.py sdist bdist_wheel
diff --git a/build_and_publish.sh b/build_and_publish.sh
index 69c8646..492747e 100755
--- a/build_and_publish.sh
+++ b/build_and_publish.sh
@@ -1,6 +1,12 @@
#!/bin/sh
-set -e
-rm -rf build dist src/*.egg-info
-python3 setup.py sdist bdist_wheel
-twine upload dist/*
-rm -rf build dist src/*.egg-info
+
+set -ex
+
+./clean.sh
+
+./build.sh
+./publish.sh
+
+./clean.sh
+
+exit 0
diff --git a/clean.sh b/clean.sh
new file mode 100755
index 0000000..7be66bb
--- /dev/null
+++ b/clean.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+set -x
+exec rm -rf build dist src/*.egg-info
diff --git a/examples/input_example.py b/examples/input_example.py
index dac53f6..f5a9fa4 100755
--- a/examples/input_example.py
+++ b/examples/input_example.py
@@ -1,21 +1,27 @@
#!/usr/bin/env python3
-# Example Python script that shows how to use edge-tts as a module
+"""
+Example Python script that shows how to use edge-tts as a module
+"""
+
import asyncio
import tempfile
from playsound import playsound
-import edgeTTS
+import edge_tts
async def main():
- communicate = edgeTTS.Communicate()
+ """
+ Main function
+ """
+ communicate = edge_tts.Communicate()
ask = input("What do you want TTS to say? ")
- with tempfile.NamedTemporaryFile() as fp:
+ with tempfile.NamedTemporaryFile() as temporary_file:
async for i in communicate.run(ask):
if i[2] is not None:
- fp.write(i[2])
- playsound(fp.name)
+ temporary_file.write(i[2])
+ playsound(temporary_file.name)
if __name__ == "__main__":
diff --git a/publish.sh b/publish.sh
new file mode 100755
index 0000000..2841174
--- /dev/null
+++ b/publish.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+set -x
+exec twine upload dist/*
diff --git a/setup.cfg b/setup.cfg
index cfb3865..d2415a3 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
[metadata]
name = edge-tts
-version = 3.0.2
+version = 4.0.0
author = rany
author_email = ranygh@riseup.net
description = Microsoft Edge's TTS
@@ -27,5 +27,5 @@ where=src
[options.entry_points]
console_scripts =
- edge-tts = edgeTTS.__main__:main
- edge-playback = edgePlayback.__init__:main
+ edge-tts = edge_tts.__main__:main
+ edge-playback = edge_playback.__init__:main
diff --git a/src/edgePlayback/__main__.py b/src/edgePlayback/__main__.py
deleted file mode 100755
index 8365b7f..0000000
--- a/src/edgePlayback/__main__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env python3
-
-from edgePlayback.__init__ import *
-
-if __name__ == "__main__":
- main()
diff --git a/src/edgePlayback/__init__.py b/src/edge_playback/__init__.py
old mode 100755
new mode 100644
similarity index 63%
rename from src/edgePlayback/__init__.py
rename to src/edge_playback/__init__.py
index f31731f..45c8716
--- a/src/edgePlayback/__init__.py
+++ b/src/edge_playback/__init__.py
@@ -1,5 +1,9 @@
#!/usr/bin/env python3
+"""
+Playback TTS with subtitles using edge-tts and mpv.
+"""
+
import subprocess
import sys
import tempfile
@@ -7,13 +11,16 @@ from shutil import which
def main():
+ """
+ Main function.
+ """
if which("mpv") and which("edge-tts"):
with tempfile.NamedTemporaryFile() as media:
with tempfile.NamedTemporaryFile() as subtitle:
print()
- print("Media file %s" % media.name)
- print("Subtitle file %s\n" % subtitle.name)
- p = subprocess.Popen(
+ print(f"Media file {media.name}")
+ print(f"Subtitle file {subtitle.name}\n")
+ with subprocess.Popen(
[
"edge-tts",
"-w",
@@ -23,17 +30,18 @@ def main():
subtitle.name,
]
+ sys.argv[1:]
- )
- p.communicate()
- p = subprocess.Popen(
+ ) as process:
+ process.communicate()
+
+ with subprocess.Popen(
[
"mpv",
"--keep-open=yes",
- "--sub-file=" + subtitle.name,
+ f"--sub-file={subtitle.name}",
media.name,
]
- )
- p.communicate()
+ ) as process:
+ process.communicate()
else:
print("This script requires mpv and edge-tts.")
diff --git a/src/edge_playback/__main__.py b/src/edge_playback/__main__.py
new file mode 100644
index 0000000..2ac8c12
--- /dev/null
+++ b/src/edge_playback/__main__.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python3
+
+"""
+This is the main file for the edge_playback package.
+"""
+
+from edge_playback.__init__ import main
+
+if __name__ == "__main__":
+ main()
diff --git a/src/edgeTTS/__init__.py b/src/edge_tts/__init__.py
similarity index 77%
rename from src/edgeTTS/__init__.py
rename to src/edge_tts/__init__.py
index 927b13c..3880896 100644
--- a/src/edgeTTS/__init__.py
+++ b/src/edge_tts/__init__.py
@@ -1,3 +1,7 @@
+"""
+__init__ for edge_tts
+"""
+
from .communicate import Communicate
from .list_voices import list_voices
from .submaker import SubMaker
diff --git a/src/edgeTTS/__main__.py b/src/edge_tts/__main__.py
similarity index 65%
rename from src/edgeTTS/__main__.py
rename to src/edge_tts/__main__.py
index e5e2737..b36f114 100644
--- a/src/edgeTTS/__main__.py
+++ b/src/edge_tts/__main__.py
@@ -1,3 +1,7 @@
+"""
+__main__ for edge_tts.
+"""
+
from .util import main
if __name__ == "__main__":
diff --git a/src/edgeTTS/communicate.py b/src/edge_tts/communicate.py
similarity index 89%
rename from src/edgeTTS/communicate.py
rename to src/edge_tts/communicate.py
index 1021553..6f9f7fb 100644
--- a/src/edgeTTS/communicate.py
+++ b/src/edge_tts/communicate.py
@@ -53,17 +53,14 @@ def remove_incompatible_characters(string):
if isinstance(string, bytes):
string = string.decode("utf-8")
- cleaned_string = ""
- for character in string:
- character_code = ord(character)
- if (
- (0 <= character_code <= 8)
- or (11 <= character_code <= 12)
- or (14 <= character_code <= 31)
- ):
- character = " "
- cleaned_string += character
- return cleaned_string
+ string = list(string)
+
+ for idx in range(len(string)): # pylint: disable=consider-using-enumerate
+ code = ord(string[idx])
+ if (0 <= code <= 8) or (11 <= code <= 12) or (14 <= code <= 31):
+ string[idx] = " "
+
+ return "".join(string)
def connect_id():
@@ -144,7 +141,8 @@ def mkssml(text, voice, pitch, rate, volume):
ssml = (
""
- f"{text}"
+ f""
+ f"{text}"
)
return ssml
@@ -192,7 +190,7 @@ def ssml_headers_plus_data(request_id, timestamp, ssml):
)
-class Communicate:
+class Communicate: # pylint: disable=too-few-public-methods
"""
Class for communicating with the service.
"""
@@ -214,7 +212,7 @@ class Communicate:
rate="+0%",
volume="+0%",
customspeak=False,
- ):
+ ): # pylint: disable=too-many-arguments, too-many-locals
"""
Runs the Communicate class.
@@ -266,7 +264,8 @@ class Communicate:
"Origin": "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.9",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+ " (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
},
) as websocket:
for message in messages:
@@ -275,18 +274,22 @@ class Communicate:
# Prepare the request to be sent to the service.
#
- # Note that sentenceBoundaryEnabled and wordBoundaryEnabled are actually supposed
- # to be booleans, but Edge Browser seems to send them as strings and not booleans.
- # This is a bug in Edge Browser as Azure Cognitive Services actually sends them as
- # booleans and not strings. For now I will send them as booleans unless it causes
+ # Note sentenceBoundaryEnabled and wordBoundaryEnabled are actually supposed
+ # to be booleans, but Edge Browser seems to send them as strings.
+ #
+ # This is a bug in Edge as Azure Cognitive Services actually sends them as
+ # bool and not string. For now I will send them as bool unless it causes
# any problems.
#
- # Also pay close attention to double { } in request (escape for Python .format()).
+ # Also pay close attention to double { } in request (escape for f-string).
request = (
f"X-Timestamp:{self.date}\r\n"
"Content-Type:application/json; charset=utf-8\r\n"
"Path:speech.config\r\n\r\n"
- f'{{"context":{{"synthesis":{{"audio":{{"metadataoptions":{{"sentenceBoundaryEnabled":{sentence_boundary},"wordBoundaryEnabled":{word_boundary}}},"outputFormat":"{codec}"}}}}}}}}\r\n'
+ '{"context":{"synthesis":{"audio":{"metadataoptions":{'
+ f'"sentenceBoundaryEnabled":{sentence_boundary},'
+ f'"wordBoundaryEnabled":{word_boundary}}},"outputFormat":"{codec}"'
+ "}}}}\r\n"
)
# Send the request to the service.
await websocket.send_str(request)
diff --git a/src/edgeTTS/constants.py b/src/edge_tts/constants.py
similarity index 100%
rename from src/edgeTTS/constants.py
rename to src/edge_tts/constants.py
diff --git a/src/edgeTTS/list_voices.py b/src/edge_tts/list_voices.py
similarity index 83%
rename from src/edgeTTS/list_voices.py
rename to src/edge_tts/list_voices.py
index 0fe6091..8d42370 100644
--- a/src/edgeTTS/list_voices.py
+++ b/src/edge_tts/list_voices.py
@@ -15,9 +15,7 @@ async def list_voices():
This pulls data from the URL used by Microsoft Edge to return a list of
all available voices. However many more experimental voices are available
- than are listed here.
- (See
- https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support)
+ than are listed here. (See https://aka.ms/csspeech/voicenames)
Returns:
dict: A dictionary of voice attributes.
@@ -29,7 +27,8 @@ async def list_voices():
"Authority": "speech.platform.bing.com",
"Sec-CH-UA": '" Not;A Brand";v="99", "Microsoft Edge";v="91", "Chromium";v="91"',
"Sec-CH-UA-Mobile": "?0",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+ "(KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
"Accept": "*/*",
"Sec-Fetch-Site": "none",
"Sec-Fetch-Mode": "cors",
diff --git a/src/edgeTTS/submaker.py b/src/edge_tts/submaker.py
similarity index 55%
rename from src/edgeTTS/submaker.py
rename to src/edge_tts/submaker.py
index 44d7e92..fe80b61 100644
--- a/src/edgeTTS/submaker.py
+++ b/src/edge_tts/submaker.py
@@ -1,29 +1,68 @@
+"""
+SubMaker package for the Edge TTS project.
+
+SubMaker is a package that makes the process of creating subtitles with
+information provided by the service easier.
+"""
+
import math
from xml.sax.saxutils import escape
def formatter(offset1, offset2, subdata):
- data = (
+ """
+ formatter returns the timecode and the text of the subtitle.
+ """
+ return (
f"{mktimestamp(offset1)} --> {mktimestamp(offset2)}\r\n"
f"{escape(subdata)}\r\n\r\n"
)
- return data
def mktimestamp(time_unit):
- hour = math.floor(time_unit / 10000 / 1000 / 3600)
- minute = math.floor((time_unit / 10000 / 1000 / 60) % 60)
- seconds = (time_unit / 10000 / 1000) % 60
+ """
+ mktimestamp returns the timecode of the subtitle.
+
+ The timecode is in the format of 00:00:00.000.
+
+ Returns:
+ str: The timecode of the subtitle.
+ """
+ hour = math.floor(time_unit / 10 ** 7 / 3600)
+ minute = math.floor((time_unit / 10 ** 7 / 60) % 60)
+ seconds = (time_unit / 10 ** 7) % 60
return f"{hour:02d}:{minute:02d}:{seconds:06.3f}"
class SubMaker:
+ """
+ SubMaker class
+ """
+
def __init__(self, overlapping=5):
+ """
+ SubMaker constructor.
+
+ Args:
+ overlapping (int): The amount of time in seconds that the
+ subtitles should overlap.
+ """
self.subs_and_offset = []
self.broken_offset = []
self.overlapping = overlapping * (10 ** 7)
def create_sub(self, timestamp, text):
+ """
+ create_sub creates a subtitle with the given timestamp and text
+ and adds it to the list of subtitles
+
+ Args:
+ timestamp (int): The timestamp of the subtitle.
+ text (str): The text of the subtitle.
+
+ Returns:
+ None
+ """
if len(self.subs_and_offset) >= 2:
if self.subs_and_offset[-2] >= timestamp + sum(self.broken_offset):
self.broken_offset.append(self.subs_and_offset[-2])
@@ -33,6 +72,12 @@ class SubMaker:
self.subs_and_offset.append(text)
def generate_subs(self):
+ """
+ generate_subs generates the complete subtitle file.
+
+ Returns:
+ str: The complete subtitle file.
+ """
if len(self.subs_and_offset) >= 2:
data = "WEBVTT\r\n\r\n"
old_time_stamp = None
diff --git a/src/edgeTTS/util.py b/src/edge_tts/util.py
similarity index 60%
rename from src/edgeTTS/util.py
rename to src/edge_tts/util.py
index 68c7ff3..8d64863 100644
--- a/src/edgeTTS/util.py
+++ b/src/edge_tts/util.py
@@ -7,7 +7,54 @@ import argparse
import asyncio
import sys
-from edgeTTS import Communicate, SubMaker, list_voices
+from edge_tts import Communicate, SubMaker, list_voices
+
+
+async def _list_voices():
+ """
+ List available voices.
+ """
+ for idx, voice in enumerate(await list_voices()):
+ if idx != 0:
+ print()
+
+ for key in voice.keys():
+ if key in ["SuggestedCodec", "FriendlyName", "Status"]:
+ continue
+ # print ("%s: %s" % ("Name" if key == "ShortName" else key, voice[key]))
+ print(f"{key}: {voice[key]}")
+
+
+async def _tts(args):
+ tts = Communicate()
+ subs = SubMaker(args.overlapping)
+ if args.write_media:
+ media_file = open(args.write_media, "wb") # pylint: disable=consider-using-with
+ async for i in tts.run(
+ args.text,
+ args.enable_sentence_boundary,
+ args.enable_word_boundary,
+ args.codec,
+ args.voice,
+ args.pitch,
+ args.rate,
+ args.volume,
+ customspeak=args.custom_ssml,
+ ):
+ if i[2] is not None:
+ if not args.write_media:
+ sys.stdout.buffer.write(i[2])
+ else:
+ media_file.write(i[2])
+ elif i[0] is not None and i[1] is not None:
+ subs.create_sub(i[0], i[1])
+ if args.write_media:
+ media_file.close()
+ if not args.write_subtitles:
+ sys.stderr.write(subs.generate_subs())
+ else:
+ with open(args.write_subtitles, "w", encoding="utf-8") as file:
+ file.write(subs.generate_subs())
async def _main():
@@ -24,19 +71,23 @@ async def _main():
parser.add_argument(
"-v",
"--voice",
- help="voice for TTS. Default: Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)",
+ help="voice for TTS. "
+ "Default: Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)",
default="Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)",
)
parser.add_argument(
"-c",
"--codec",
- help="codec format. Default: audio-24khz-48kbitrate-mono-mp3. Another choice is webm-24khz-16bit-mono-opus. For more info check https://bit.ly/2T33h6S",
+ help="codec format. Default: audio-24khz-48kbitrate-mono-mp3. "
+ "Another choice is webm-24khz-16bit-mono-opus. "
+ "For more info check https://bit.ly/2T33h6S",
default="audio-24khz-48kbitrate-mono-mp3",
)
group.add_argument(
"-l",
"--list-voices",
- help="lists available voices. Edge's list is incomplete so check https://bit.ly/2SFq1d3",
+ help="lists available voices. "
+ "Edge's list is incomplete so check https://bit.ly/2SFq1d3",
action="store_true",
)
parser.add_argument(
@@ -85,6 +136,10 @@ async def _main():
)
args = parser.parse_args()
+ if args.list_voices:
+ await _list_voices()
+ sys.exit(0)
+
if args.text is not None or args.file is not None:
if args.file is not None:
# we need to use sys.stdin.read() because some devices
@@ -96,45 +151,8 @@ async def _main():
# logger.debug("reading from %s" % args.file)
with open(args.file, "r", encoding="utf-8") as file:
args.text = file.read()
- tts = Communicate()
- subs = SubMaker(args.overlapping)
- if args.write_media:
- media_file = open(args.write_media, "wb")
- async for i in tts.run(
- args.text,
- args.enable_sentence_boundary,
- args.enable_word_boundary,
- args.codec,
- args.voice,
- args.pitch,
- args.rate,
- args.volume,
- customspeak=args.custom_ssml,
- ):
- if i[2] is not None:
- if not args.write_media:
- sys.stdout.buffer.write(i[2])
- else:
- media_file.write(i[2])
- elif i[0] is not None and i[1] is not None:
- subs.create_sub(i[0], i[1])
- if args.write_media:
- media_file.close()
- if not args.write_subtitles:
- sys.stderr.write(subs.generate_subs())
- else:
- with open(args.write_subtitles, "w", encoding="utf-8") as file:
- file.write(subs.generate_subs())
- elif args.list_voices:
- for idx, voice in enumerate(await list_voices()):
- if idx != 0:
- print()
- for key in voice.keys():
- if key in ["SuggestedCodec", "FriendlyName", "Status"]:
- continue
- # print ("%s: %s" % ("Name" if key == "ShortName" else key, voice[key]))
- print(f"{key}: {voice[key]}")
+ await _tts(args)
def main():