diff --git a/examples/input_example.py b/examples/input_example.py index 6ec43b3..f94d867 100755 --- a/examples/input_example.py +++ b/examples/input_example.py @@ -6,13 +6,13 @@ import edgeTTS from playsound import playsound async def main(): - communicate = edgeTTS.Communicate() - ask = input("What do you want TTS to say? ") - with tempfile.NamedTemporaryFile() as fp: - async for i in communicate.run(ask): - if i[2] is not None: - fp.write(i[2]) - playsound(fp.name) + communicate = edgeTTS.Communicate() + ask = input("What do you want TTS to say? ") + with tempfile.NamedTemporaryFile() as fp: + async for i in communicate.run(ask): + if i[2] is not None: + fp.write(i[2]) + playsound(fp.name) if __name__ == "__main__": - asyncio.run(main()) + asyncio.run(main()) diff --git a/src/edgePlayback/__init__.py b/src/edgePlayback/__init__.py index 9973f04..501c121 100755 --- a/src/edgePlayback/__init__.py +++ b/src/edgePlayback/__init__.py @@ -8,8 +8,9 @@ def main(): if which('mpv') and which('edge-tts'): with tempfile.NamedTemporaryFile() as media: with tempfile.NamedTemporaryFile() as subtitle: - print ("Media file: %s" % media.name) - print ("Subtitle file: %s" % subtitle.name) + print () + print ("Media file %s" % media.name) + print ("Subtitle file %s\n" % subtitle.name) p = subprocess.Popen(['edge-tts', '-w'] + sys.argv[1:], stdout=media, stderr=subtitle) p.communicate() p = subprocess.Popen(['mpv', '--keep-open=yes', '--sub-file=' + subtitle.name, media.name]) diff --git a/src/edgeTTS/__init__.py b/src/edgeTTS/__init__.py index 31c66d7..7be4126 100755 --- a/src/edgeTTS/__init__.py +++ b/src/edgeTTS/__init__.py @@ -23,12 +23,13 @@ voiceList = 'https://speech.platform.bing.com/consumer/speech/synthesize/readalo def formatdate(): return time.strftime('%a %b %d %Y %H:%M:%S GMT+0000 (Coordinated Universal Time)', time.gmtime()) -def bool_to_lower_str(x): - return 'true' if x else 'false' - +# The connectID Edge sends to the service (just UUID without dashes) def connectId(): return str(uuid.uuid4()).replace("-", "") +# The service doesn't support a couple character ranges. Most bothering being +# \v because it is present in OCR-ed PDFs. Not doing this causes the whole +# connection with websockets server to crash. def removeIncompatibleControlChars(s): logger = logging.getLogger("edgeTTS.removeIncompatibleControlChars") output = "" @@ -44,6 +45,7 @@ def removeIncompatibleControlChars(s): logger.debug("Generated %s" % output.encode('utf-8')) return output +# Make WEBVTT formated timestamp based on TTS service's Offset value def mktimestamp(ns): hour = math.floor(ns / 10000 / 1000 / 3600) minute = math.floor((ns / 10000 / 1000 / 60) % 60) @@ -51,6 +53,9 @@ def mktimestamp(ns): mili = float(str(math.modf((ns / 10000) - (1000 * seconds))[1])[:3]) return "%.02d:%.02d:%.02d.%.03d" % (hour, minute, seconds, mili) +# Return loaded JSON data of list of Edge's voices +# NOTE: It's not the total list of available voices. +# This is only what is presented in the UI. def list_voices(): logger = logging.getLogger("edgeTTS.list_voices") with httpx.Client(http2=True, headers={ @@ -94,11 +99,11 @@ class SubMaker: except IndexError: # This means TTS said one word only. data += self.formatter(0, first + ((10**7) * 10), self.subsAndOffset[first]) for sub in sorted(self.subsAndOffset.keys(), key=int)[1:]: - if (oldTimeStamp and oldSubData) is not None: + if oldTimeStamp is not None and oldSubData is not None: data += self.formatter(oldTimeStamp, sub + self.overlapping, oldSubData) ## overlapping Subtitles oldTimeStamp = sub oldSubData = self.subsAndOffset[sub] - if (oldTimeStamp and oldSubData) is not None: + if oldTimeStamp is not None and oldSubData is not None: data += self.formatter(oldTimeStamp, oldTimeStamp + ((10**7) * 10), oldSubData) return data @@ -117,8 +122,8 @@ class Communicate: return message async def run(self, msg, sentenceBoundary=False, wordBoundary=False, codec="audio-24khz-48kbitrate-mono-mp3", voice="Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)", pitch="+0Hz", rate="+0%", volume="+0%", customspeak=False): - sentenceBoundary = bool_to_lower_str(sentenceBoundary) - wordBoundary = bool_to_lower_str(wordBoundary) + sentenceBoundary = str(sentenceBoundary).lower() + wordBoundary = str(wordBoundary).lower() if not customspeak: wsmax = 2 ** 16 @@ -215,11 +220,11 @@ async def _main(): parser.add_argument('-V', '--volume', help="set TTS volume. Default +0%%. For more info check https://bit.ly/3eAE5Nx", default="+0%") parser.add_argument('-s', '--enable-sentence-boundary', help="enable sentence boundary (not implemented but settable)", action='store_true') parser.add_argument('-w', '--enable-word-boundary', help="enable word boundary (not implemented but settable)", action='store_true') - parser.add_argument('-O', '--overlapping', help="overlapping subtitles in seconds", default=5) + parser.add_argument('-O', '--overlapping', help="overlapping subtitles in seconds", default=5, type=float) args = parser.parse_args() logging.basicConfig(level=args.log_level) logger = logging.getLogger("edgeTTS._main") - if (args.text or args.file) is not None: + if args.text is not None or args.file is not None: if args.file is not None: # we need to use sys.stdin.read() because some devices # like Windows and Termux don't have a /dev/stdin. @@ -235,7 +240,7 @@ async def _main(): async for i in tts.run(args.text, args.enable_sentence_boundary, args.enable_word_boundary, args.codec, args.voice, args.pitch, args.rate, args.volume, customspeak=args.custom_ssml): if i[2] is not None: sys.stdout.buffer.write(i[2]) - elif (i[0] and i[1]) is not None: + elif i[0] is not None and i[1] is not None: subs.createSub(i[0], i[1]) if not subs.subsAndOffset == {}: sys.stderr.write(subs.generateSubs())