Fix some bugs

This commit is contained in:
rany 2021-06-09 14:16:51 +03:00
parent 5c1f8b8355
commit 7ced20885c
3 changed files with 26 additions and 20 deletions

View File

@ -6,13 +6,13 @@ import edgeTTS
from playsound import playsound
async def main():
communicate = edgeTTS.Communicate()
ask = input("What do you want TTS to say? ")
with tempfile.NamedTemporaryFile() as fp:
async for i in communicate.run(ask):
if i[2] is not None:
fp.write(i[2])
playsound(fp.name)
communicate = edgeTTS.Communicate()
ask = input("What do you want TTS to say? ")
with tempfile.NamedTemporaryFile() as fp:
async for i in communicate.run(ask):
if i[2] is not None:
fp.write(i[2])
playsound(fp.name)
if __name__ == "__main__":
asyncio.run(main())
asyncio.run(main())

View File

@ -8,8 +8,9 @@ def main():
if which('mpv') and which('edge-tts'):
with tempfile.NamedTemporaryFile() as media:
with tempfile.NamedTemporaryFile() as subtitle:
print ("Media file: %s" % media.name)
print ("Subtitle file: %s" % subtitle.name)
print ()
print ("Media file %s" % media.name)
print ("Subtitle file %s\n" % subtitle.name)
p = subprocess.Popen(['edge-tts', '-w'] + sys.argv[1:], stdout=media, stderr=subtitle)
p.communicate()
p = subprocess.Popen(['mpv', '--keep-open=yes', '--sub-file=' + subtitle.name, media.name])

View File

@ -23,12 +23,13 @@ voiceList = 'https://speech.platform.bing.com/consumer/speech/synthesize/readalo
def formatdate():
return time.strftime('%a %b %d %Y %H:%M:%S GMT+0000 (Coordinated Universal Time)', time.gmtime())
def bool_to_lower_str(x):
return 'true' if x else 'false'
# The connectID Edge sends to the service (just UUID without dashes)
def connectId():
return str(uuid.uuid4()).replace("-", "")
# The service doesn't support a couple character ranges. Most bothering being
# \v because it is present in OCR-ed PDFs. Not doing this causes the whole
# connection with websockets server to crash.
def removeIncompatibleControlChars(s):
logger = logging.getLogger("edgeTTS.removeIncompatibleControlChars")
output = ""
@ -44,6 +45,7 @@ def removeIncompatibleControlChars(s):
logger.debug("Generated %s" % output.encode('utf-8'))
return output
# Make WEBVTT formated timestamp based on TTS service's Offset value
def mktimestamp(ns):
hour = math.floor(ns / 10000 / 1000 / 3600)
minute = math.floor((ns / 10000 / 1000 / 60) % 60)
@ -51,6 +53,9 @@ def mktimestamp(ns):
mili = float(str(math.modf((ns / 10000) - (1000 * seconds))[1])[:3])
return "%.02d:%.02d:%.02d.%.03d" % (hour, minute, seconds, mili)
# Return loaded JSON data of list of Edge's voices
# NOTE: It's not the total list of available voices.
# This is only what is presented in the UI.
def list_voices():
logger = logging.getLogger("edgeTTS.list_voices")
with httpx.Client(http2=True, headers={
@ -94,11 +99,11 @@ class SubMaker:
except IndexError: # This means TTS said one word only.
data += self.formatter(0, first + ((10**7) * 10), self.subsAndOffset[first])
for sub in sorted(self.subsAndOffset.keys(), key=int)[1:]:
if (oldTimeStamp and oldSubData) is not None:
if oldTimeStamp is not None and oldSubData is not None:
data += self.formatter(oldTimeStamp, sub + self.overlapping, oldSubData) ## overlapping Subtitles
oldTimeStamp = sub
oldSubData = self.subsAndOffset[sub]
if (oldTimeStamp and oldSubData) is not None:
if oldTimeStamp is not None and oldSubData is not None:
data += self.formatter(oldTimeStamp, oldTimeStamp + ((10**7) * 10), oldSubData)
return data
@ -117,8 +122,8 @@ class Communicate:
return message
async def run(self, msg, sentenceBoundary=False, wordBoundary=False, codec="audio-24khz-48kbitrate-mono-mp3", voice="Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)", pitch="+0Hz", rate="+0%", volume="+0%", customspeak=False):
sentenceBoundary = bool_to_lower_str(sentenceBoundary)
wordBoundary = bool_to_lower_str(wordBoundary)
sentenceBoundary = str(sentenceBoundary).lower()
wordBoundary = str(wordBoundary).lower()
if not customspeak:
wsmax = 2 ** 16
@ -215,11 +220,11 @@ async def _main():
parser.add_argument('-V', '--volume', help="set TTS volume. Default +0%%. For more info check https://bit.ly/3eAE5Nx", default="+0%")
parser.add_argument('-s', '--enable-sentence-boundary', help="enable sentence boundary (not implemented but settable)", action='store_true')
parser.add_argument('-w', '--enable-word-boundary', help="enable word boundary (not implemented but settable)", action='store_true')
parser.add_argument('-O', '--overlapping', help="overlapping subtitles in seconds", default=5)
parser.add_argument('-O', '--overlapping', help="overlapping subtitles in seconds", default=5, type=float)
args = parser.parse_args()
logging.basicConfig(level=args.log_level)
logger = logging.getLogger("edgeTTS._main")
if (args.text or args.file) is not None:
if args.text is not None or args.file is not None:
if args.file is not None:
# we need to use sys.stdin.read() because some devices
# like Windows and Termux don't have a /dev/stdin.
@ -235,7 +240,7 @@ async def _main():
async for i in tts.run(args.text, args.enable_sentence_boundary, args.enable_word_boundary, args.codec, args.voice, args.pitch, args.rate, args.volume, customspeak=args.custom_ssml):
if i[2] is not None:
sys.stdout.buffer.write(i[2])
elif (i[0] and i[1]) is not None:
elif i[0] is not None and i[1] is not None:
subs.createSub(i[0], i[1])
if not subs.subsAndOffset == {}:
sys.stderr.write(subs.generateSubs())