Split long TTS messages
parent
c7d81b26df
commit
defca3cb25
|
@ -18,6 +18,28 @@ from telethon import events, helpers, types
|
||||||
mimetypes.add_type('audio/mpeg', '.borg+tts')
|
mimetypes.add_type('audio/mpeg', '.borg+tts')
|
||||||
|
|
||||||
|
|
||||||
|
def split_text(text, n=40):
|
||||||
|
words = text.split()
|
||||||
|
while len(words) > n:
|
||||||
|
comma = None
|
||||||
|
semicolon = None
|
||||||
|
for i in reversed(range(n)):
|
||||||
|
if words[i].endswith('.'):
|
||||||
|
yield ' '.join(words[:i + 1])
|
||||||
|
words = words[i + 1:]
|
||||||
|
break
|
||||||
|
elif not semicolon and words[i].endswith(';'):
|
||||||
|
semicolon = i + 1
|
||||||
|
elif not comma and words[i].endswith(','):
|
||||||
|
comma = i + 1
|
||||||
|
else:
|
||||||
|
cut = semicolon or comma or n
|
||||||
|
yield ' '.join(words[:cut])
|
||||||
|
words = words[cut:]
|
||||||
|
if words:
|
||||||
|
yield ' '.join(words)
|
||||||
|
|
||||||
|
|
||||||
class Translator:
|
class Translator:
|
||||||
_TKK_RE = re.compile(r"tkk:'(\d+)\.(\d+)'", re.DOTALL)
|
_TKK_RE = re.compile(r"tkk:'(\d+)\.(\d+)'", re.DOTALL)
|
||||||
_BASE_URL = 'https://translate.google.com'
|
_BASE_URL = 'https://translate.google.com'
|
||||||
|
@ -143,23 +165,28 @@ class Translator:
|
||||||
async with self._tkk_lock:
|
async with self._tkk_lock:
|
||||||
self._tkk = await self._fetch_tkk()
|
self._tkk = await self._fetch_tkk()
|
||||||
|
|
||||||
params = [
|
parts = list(split_text(text))
|
||||||
('ie', 'UTF-8'),
|
result = b''
|
||||||
('q', text),
|
for i, part in enumerate(parts):
|
||||||
('tl', target or self._target),
|
params = [
|
||||||
('total', 1),
|
('ie', 'UTF-8'),
|
||||||
('idx', 0),
|
('q', part),
|
||||||
('textlen', len(helpers.add_surrogate(text))),
|
('tl', target or self._target),
|
||||||
('tk', self._calc_token(text)),
|
('total', len(parts)),
|
||||||
('client', 'webapp'),
|
('idx', i),
|
||||||
('prev', 'input'),
|
('textlen', len(helpers.add_surrogate(part))),
|
||||||
]
|
('tk', self._calc_token(part)),
|
||||||
|
('client', 'webapp'),
|
||||||
|
('prev', 'input'),
|
||||||
|
]
|
||||||
|
|
||||||
async with self._session.get(self._TRANSLATE_TTS_URL, params=params) as resp:
|
async with self._session.get(self._TRANSLATE_TTS_URL, params=params) as resp:
|
||||||
if resp.status == 404:
|
if resp.status == 404:
|
||||||
raise ValueError('unknown target language')
|
raise ValueError('unknown target language')
|
||||||
else:
|
else:
|
||||||
return await resp.read()
|
result += await resp.read()
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
await self._session.close()
|
await self._session.close()
|
||||||
|
|
Reference in New Issue