Commit c20afed92e679c79f5622be057c66906df514035
1 parent
ee505ab7
Added sample bot server with required python libraries list
Showing
2 changed files
with
409 additions
and
0 deletions
requirements.txt
0 → 100644
1 | +APScheduler==3.6.3 | |
2 | +certifi==2020.12.5 | |
3 | +cffi==1.14.4 | |
4 | +chardet==4.0.0 | |
5 | +cryptography==3.3.1 | |
6 | +decorator==4.4.2 | |
7 | +future==0.18.2 | |
8 | +idna==2.10 | |
9 | +pycparser==2.20 | |
10 | +python-telegram-bot==11.0.0 | |
11 | +pytz==2020.5 | |
12 | +requests==2.25.1 | |
13 | +six==1.15.0 | |
14 | +SoundFile==0.10.3.post1 | |
15 | +tornado==6.1 | |
16 | +tzlocal==2.1 | |
17 | +urllib3==1.26.3 | ... | ... |
stt_bot_server.py
0 → 100644
1 | +#!/usr/bin/env python | |
2 | +# -*- coding: utf-8 -*- | |
3 | +# | |
4 | +# SDAML, NSU, 2017, 2021 | |
5 | +# Evgeniy Pavlovskiy | |
6 | +""" | |
7 | +This Bot uses the Updater class to handle the bot. | |
8 | + | |
9 | +Then, the bot is started and runs until we press Ctrl-C on the command line. | |
10 | + | |
11 | +Usage: | |
12 | +... | |
13 | +Press Ctrl-C on the command line or send a signal to the process to stop the | |
14 | +bot. | |
15 | +""" | |
16 | + | |
17 | +from telegram import (ReplyKeyboardMarkup, ReplyKeyboardRemove, MessageEntity, LabeledPrice) | |
18 | +from telegram.ext import (Updater, CommandHandler, MessageHandler, Filters, | |
19 | + ConversationHandler, RegexHandler) | |
20 | +import soundfile as sf | |
21 | +import logging | |
22 | + | |
23 | +from html import escape | |
24 | +import sys | |
25 | +import codecs | |
26 | +import os | |
27 | + | |
28 | +# Enable logging | |
29 | +logging.basicConfig(format='%(asctime)s - %(name)s (%(funcName)s)- %(levelname)s - %(message)s', | |
30 | + level=logging.INFO, | |
31 | + filename = u'stt_bot.log') | |
32 | + | |
33 | +logger = logging.getLogger(__name__) | |
34 | + | |
35 | +from sdk_voice import * | |
36 | +from stt_voice import * | |
37 | + | |
38 | +subscription_key = '1' | |
39 | +stt_client = Speech_Client(url="http://172.16.71.234", port=7999, subscription_key=subscription_key) | |
40 | + | |
41 | +NAME, BIRTHDATE, TAGS, ADD_PROFILE, ENROLL_PROFILE, \ | |
42 | +ENROLL_VOICE, IDENTIFY_VOICE, KEY_ENTRY, DELETE_PROFILE, \ | |
43 | +RECOGNIZE_SPEECH = range(10) | |
44 | + | |
45 | +allowed_users = [255603267 # euxsun | |
46 | + ] | |
47 | + | |
48 | +def check_users(update): | |
49 | + user = update.message.from_user | |
50 | + | |
51 | + logger = logging.getLogger(__name__) | |
52 | + logger.info('User {} trying to use this bot.'.format(user.id)) | |
53 | + | |
54 | + allowed = user.id in allowed_users | |
55 | + if not allowed: | |
56 | + logger.info('User {} now allowed to use this bot. Write to @EuXsun to allow.' | |
57 | + .format(user.id)) | |
58 | +# update.message.reply_text('User {} now allowed to use this bot. Write to @EuXsun to allow.' | |
59 | +# .format(user.id)) | |
60 | + return False #not allowed | |
61 | + | |
62 | +def readable_profile_info(info): | |
63 | + try: | |
64 | + res = '/'+str(info['id'])+': '+info['name']+'('+info['gender']+'), '+\ | |
65 | + info['birth_date']+' ('+info['tags']+').' | |
66 | + return res | |
67 | + except Exception as e: | |
68 | + return str(info) | |
69 | + | |
70 | +def start(bot, update): | |
71 | + if check_users(update): | |
72 | + return | |
73 | + logger = logging.getLogger(__name__) | |
74 | + logger.info("/start command sent.") | |
75 | + #logger.info('Sending to server request get_profiles().') | |
76 | + | |
77 | + #profiles = spk_client.get_profiles() | |
78 | + | |
79 | + #if 'error' in profiles: | |
80 | + # logger.error('Got error in response: %s' % profiles["error"]) | |
81 | + # update.message.reply_text('Error with start commend.') | |
82 | + #else: | |
83 | + # rep = [] | |
84 | + # for p in profiles: | |
85 | + # logger.info('Sending get_profile_info(profile_id = '+ str(p) + ')') | |
86 | + # info = spk_client.get_profile_info(p) | |
87 | + # logger.info('Server answered: %s', info) | |
88 | + # update.message.reply_text(readable_profile_info(info)) | |
89 | + # rep.append(''+str(p)) | |
90 | + | |
91 | + | |
92 | + | |
93 | +def help(bot, update): | |
94 | + update.message.reply_text( | |
95 | + 'Добро пожаловать в @SpeechToTextNsuAi_bot!' | |
96 | + 'Отравьте голосовое сообщение или аудиофайл и бот превратит его в текст, расставив знаки препинания и выделив следующие сущности: <i>человек</i>, <b>местоположение</b>, <u>организация</u>.\n\n' | |
97 | + 'Обрабатываются записи не более 5 минут.\n\n' | |
98 | + '/help - настоящая справка.\n\n' | |
99 | + '/cancel - отмена выбранного действия.' | |
100 | + 'Это сибирская ❄️ разработка лаборатории аналитики потоковых данных и машинного обучения НГУ (https://bigdata.nsu.ru). \n\n', | |
101 | + parse_mode='HTML' | |
102 | + ) | |
103 | + | |
104 | + | |
105 | +def cancel(bot, update): | |
106 | + if check_users(update): | |
107 | + return | |
108 | + user = update.message.from_user | |
109 | + logger.info("User %s canceled the process." % user.first_name) | |
110 | + update.message.reply_text('Отменяю последнюю операцию.', | |
111 | + reply_markup=ReplyKeyboardRemove()) | |
112 | + | |
113 | + return ConversationHandler.END | |
114 | + | |
115 | +def error(bot, update, error): | |
116 | + logger.error('Update "{}" caused error "{}"'.format(str(update), str(error))) | |
117 | + update.message.reply_text('Error: {}.'.format(str(error))) | |
118 | + | |
119 | + | |
120 | +def recognize(but, update): | |
121 | + if check_users(update): | |
122 | + return | |
123 | + | |
124 | + logger = logging.getLogger(__name__) | |
125 | + logger.info('User selected /recognize command.') | |
126 | + | |
127 | + update.message.reply_text('Запишите не более 5 минут речи.') | |
128 | + | |
129 | + return RECOGNIZE_SPEECH | |
130 | + | |
131 | +def _parse_entity(text,entity): | |
132 | +# if sys.maxunicode == 0xffff: | |
133 | + return text[entity.offset:entity.offset + entity.length] | |
134 | +# else: | |
135 | +# entity_text = codecs.encode(str(text),'utf_16_le') | |
136 | +# entity_text = entity_text[entity.offset * 2:(entity.offset + entity.length) * 2] | |
137 | +# | |
138 | +# return codecs.decode(entity_text,'utf_16_le') | |
139 | + | |
140 | +def parse_ent(message_text,entities): | |
141 | + if message_text is None: | |
142 | + return None | |
143 | + | |
144 | + # if not sys.maxunicode == 0xffff: | |
145 | + # message_text = codecs.encode(message_text,'utf_16_le') | |
146 | + | |
147 | + html_text = '' | |
148 | + last_offset = 0 | |
149 | + entities_dict = { | |
150 | + entity: _parse_entity(message_text,entity) | |
151 | + for entity in entities | |
152 | + } | |
153 | + | |
154 | + for entity, text in sorted(entities_dict.items(), key=(lambda item: item[0].offset)): | |
155 | + text = escape(text) | |
156 | + | |
157 | + if entity.type == MessageEntity.TEXT_LINK: | |
158 | + insert = '<a href="{}">{}</a>'.format(entity.url, text) | |
159 | + elif (entity.type == MessageEntity.URL) and urled: | |
160 | + insert = '<a href="{0}">{0}</a>'.format(text) | |
161 | + elif entity.type == MessageEntity.BOLD: | |
162 | + insert = '<b>' + text + '</b>' | |
163 | + elif entity.type == MessageEntity.ITALIC: | |
164 | + insert = '<i>' + text + '</i>' | |
165 | + elif entity.type == 'underline': | |
166 | + insert = '<u>' + text + '</u>' | |
167 | + elif entity.type == MessageEntity.CODE: | |
168 | + insert = '<code>' + text + '</code>' | |
169 | + elif entity.type == MessageEntity.PRE: | |
170 | + insert = '<pre>' + text + '</pre>' | |
171 | + else: | |
172 | + insert = text | |
173 | + | |
174 | + if True: #sys.maxunicode == 0xffff: | |
175 | + html_text += escape(message_text[last_offset:entity.offset]) + insert | |
176 | +# else: | |
177 | +# html_text += escape(codecs.decode(message_text[last_offset * 2:entity.offset * 2], | |
178 | +# 'utf_16_le')) + insert | |
179 | + | |
180 | + last_offset = entity.offset + entity.length | |
181 | + | |
182 | + if True: #sys.maxunicode == 0xffff: | |
183 | + html_text += escape(message_text[last_offset:]) | |
184 | +# else: | |
185 | +# html_text += escape(codecs.decode(message_text[last_offset * 2:],'utf_16_le')) | |
186 | + return html_text | |
187 | + | |
188 | +def recognize_speech(bot, update): | |
189 | + if check_users(update): | |
190 | + return | |
191 | + logger = logging.getLogger(__name__) | |
192 | + | |
193 | + if not(update.message.voice): | |
194 | + logger.info('No voice in the message. Repeating.') | |
195 | + update.message.reply_text('Это не запись голоса. Пожалуйста повторите, нажав кнопку микрофона (для записи голоса), или перешлите сюда запись голоса и чата. Можете также отменить операцию: /cancel.') | |
196 | + return RECOGNIZE_SPEECH | |
197 | + | |
198 | + #update.message.reply_text('Processing...') | |
199 | + voice_file = bot.get_file(update.message.voice.file_id) | |
200 | + voice_file.download('tmp.ogg') | |
201 | + | |
202 | + logger.info("Sending information to server.") | |
203 | + res = stt_client.recognize_speech(filename = 'tmp', extension = 'ogg') | |
204 | + | |
205 | + | |
206 | + if 'message' in res: | |
207 | + logger.error('Error from server: %s' % res['error']) | |
208 | + update.message.reply_text("Сервер ответил с ошибкой:"+str(res)) | |
209 | + else: | |
210 | + logger.info('Voice successfully passed to server with result: %s' % str(res)) | |
211 | + empty = True | |
212 | + r = res | |
213 | + if 'ner' in r: | |
214 | + text = ' '.join([s['text'] for s in r['ner']]) | |
215 | + html_out = '' | |
216 | + ent_cnt = 0 | |
217 | + for s in r['ner']: | |
218 | + text = str(s['text']) | |
219 | + entities = [] | |
220 | + if 'named_entities' in s: | |
221 | + ne = s['named_entities'] | |
222 | + if 'LOCATION' in ne: | |
223 | + for o in ne['LOCATION']: | |
224 | + entities.append( | |
225 | + MessageEntity(type=MessageEntity.BOLD, offset=o[0], length=o[1]-o[0]) | |
226 | + ) | |
227 | + if 'PERSON' in ne: | |
228 | + for o in ne['PERSON']: | |
229 | + entities.append( | |
230 | + MessageEntity(type=MessageEntity.ITALIC, offset=o[0], length=o[1]-o[0]) | |
231 | + ) | |
232 | + if 'ORG' in ne: | |
233 | + for o in ne['ORG']: | |
234 | + entities.append( | |
235 | + MessageEntity(type='underline', offset=o[0], length=o[1]-o[0]) | |
236 | + ) | |
237 | + html_out += parse_ent(text,entities)+' ' | |
238 | + ent_cnt += len(entities) | |
239 | + empty = html_out == '' | |
240 | + logger.info('Found {} entities'.format(ent_cnt)) | |
241 | + logger.info('Out html: {}'.format(html_out)) | |
242 | + | |
243 | + elif 'stt.punct' in r: | |
244 | + html_out = ' '.join([s['sent'] for s in r['stt.punct']]) | |
245 | + empty = html_out == '' | |
246 | + if empty: | |
247 | + update.message.reply_text('_Ничего не распозналось_.', parse_mode= 'Markdown') | |
248 | + else: | |
249 | + update.message.reply_text(html_out, quote=False, parse_mode='HTML') | |
250 | + | |
251 | + | |
252 | + return ConversationHandler.END | |
253 | + | |
254 | +def recognize_speech_file(bot, update): | |
255 | + if check_users(update): | |
256 | + return | |
257 | + logger = logging.getLogger(__name__) | |
258 | + | |
259 | + msg = update.message.reply_text('Processing...') | |
260 | + voice_file = bot.get_file(update.message.document.file_id) | |
261 | + filename = voice_file.download() | |
262 | + msg2 = msg.edit_text('Processing: {}'.format(filename)) | |
263 | + | |
264 | + filenam, extension = os.path.splitext(filename) | |
265 | + logger.info("Sending information to server: {}, {}.".format(filenam,extension[1:])) | |
266 | + | |
267 | + if not extension[1:].lower() in ['wav','mp3','m4a','ogg','amr','opus']: | |
268 | + logger.info('File is not recognized extension. Repeating.') | |
269 | + update.message.reply_text('Файл не того формата. Прикрепите файл wav,mp3,m4a,ogg,amr,opus или перешлите сюда запись голоса из чата. Можете также отменить операцию: /cancel.') | |
270 | + return RECOGNIZE_SPEECH | |
271 | + | |
272 | + res = stt_client.recognize_speech(filename = filenam, extension = extension[1:]) | |
273 | + | |
274 | + | |
275 | + if 'message' in res: | |
276 | + logger.error('Error from server: %s' % res['message']) | |
277 | + msg2.edit_text("Сервер ответил с ошибкой:"+str(res['message'])) | |
278 | + else: | |
279 | + logger.info('Voice successfully passed to server with result: %s' % str(res)) | |
280 | + empty = True | |
281 | + r = res | |
282 | + if 'ner' in r: | |
283 | + text = ' '.join([s['text'] for s in r['ner']]) | |
284 | + html_out = '' | |
285 | + ent_cnt = 0 | |
286 | + for s in r['ner']: | |
287 | + text = str(s['text']) | |
288 | + entities = [] | |
289 | + if 'named_entities' in s: | |
290 | + ne = s['named_entities'] | |
291 | + if 'LOCATION' in ne: | |
292 | + for o in ne['LOCATION']: | |
293 | + entities.append( | |
294 | + MessageEntity(type=MessageEntity.BOLD, offset=o[0], length=o[1]-o[0]) | |
295 | + ) | |
296 | + if 'PERSON' in ne: | |
297 | + for o in ne['PERSON']: | |
298 | + entities.append( | |
299 | + MessageEntity(type=MessageEntity.ITALIC, offset=o[0], length=o[1]-o[0]) | |
300 | + ) | |
301 | + if 'ORG' in ne: | |
302 | + for o in ne['ORG']: | |
303 | + entities.append( | |
304 | + MessageEntity(type='underline', offset=o[0], length=o[1]-o[0]) | |
305 | + ) | |
306 | + html_out += parse_ent(text,entities)+' ' | |
307 | + ent_cnt += len(entities) | |
308 | + empty = html_out == '' | |
309 | + logger.info('Found {} entities'.format(ent_cnt)) | |
310 | + logger.info('Out html: {}'.format(html_out)) | |
311 | + | |
312 | + elif 'stt.punct' in r: | |
313 | + html_out = ' '.join([s['sent'] for s in r['stt.punct']]) | |
314 | + empty = html_out == '' | |
315 | + if empty: | |
316 | + msg2.edit_text('_Ничего не распозналось_.', parse_mode= 'Markdown') | |
317 | + else: | |
318 | + msg2.edit_text(html_out, quote=False, parse_mode='HTML') | |
319 | + | |
320 | + | |
321 | + return ConversationHandler.END | |
322 | + | |
323 | +def donate(bot, update): | |
324 | + chat_id = update.message.chat_id | |
325 | + title = "Payment Example" | |
326 | + description = "Payment Example using python-telegram-bot" | |
327 | + # select a payload just for you to recognize its the donation from your bot | |
328 | + payload = "Custom-Payload" | |
329 | + # In order to get a provider_token see https://core.telegram.org/bots/payments#getting-a-token | |
330 | + provider_token = "" | |
331 | + currency = "RUB" | |
332 | + start_parameter = "test-payment" | |
333 | + # price in roubles | |
334 | + price = 80 | |
335 | + # price * 100 so as to include 2 decimal points | |
336 | + prices = [LabeledPrice("Test", price * 100)] | |
337 | + | |
338 | + # optionally pass need_name=True, need_phone_number=True, | |
339 | + # need_email=True, need_shipping_address=True, is_flexible=True | |
340 | + bot.send_invoice( | |
341 | + chat_id, title, description, payload, provider_token, start_parameter, currency, prices | |
342 | + ) | |
343 | + | |
344 | +# finally, after contacting the payment provider... | |
345 | +def successful_payment_callback(bot, update): | |
346 | + # do something after successfully receiving payment? | |
347 | + update.message.reply_text("Спасибо, что поддерживаете сибирскую науку!") | |
348 | + | |
349 | + | |
350 | +def main(): | |
351 | + # Create the EvetHandler and pass it your bot's token. | |
352 | + # SpeakerIdentificatorBot | |
353 | + updater = Updater("<put here bot API>") | |
354 | + | |
355 | + # Get the dispatcher to register handlers | |
356 | + dp = updater.dispatcher | |
357 | + | |
358 | + #define custom handlers | |
359 | + recognize_handler = ConversationHandler( | |
360 | + entry_points=[MessageHandler(Filters.voice, recognize_speech), | |
361 | + MessageHandler(Filters.voice | Filters.document.category('audio') | Filters.audio, recognize_speech_file)], #CommandHandler('recognize', recognize)], | |
362 | + | |
363 | + states={ | |
364 | + RECOGNIZE_SPEECH: [MessageHandler(Filters.voice, recognize_speech), | |
365 | + MessageHandler(Filters.audio | Filters.voice | Filters.document.category('audio'), recognize_speech_file), | |
366 | + CommandHandler('cancel', cancel)] | |
367 | + }, | |
368 | + | |
369 | + fallbacks=[CommandHandler('cancel', cancel)] | |
370 | + ) | |
371 | + dp.add_handler(CommandHandler("start", start)) | |
372 | + dp.add_handler(CommandHandler("help", help)) | |
373 | + dp.add_handler(CommandHandler("donate", donate)) | |
374 | + dp.add_handler(recognize_handler) | |
375 | + | |
376 | + # Success! Notify your user! | |
377 | + dp.add_handler(MessageHandler(Filters.successful_payment, successful_payment_callback)) | |
378 | + | |
379 | + # log all errors | |
380 | + dp.add_error_handler(error) | |
381 | + | |
382 | + # Start the Bot | |
383 | + updater.start_polling() | |
384 | + | |
385 | + # Run the bot until you press Ctrl-C or the process receives SIGINT, | |
386 | + # SIGTERM or SIGABRT. This should be used most of the time, since | |
387 | + # start_polling() is non-blocking and will stop the bot gracefully. | |
388 | + updater.idle() | |
389 | + | |
390 | + | |
391 | +if __name__ == '__main__': | |
392 | + main() | ... | ... |
Please
register
or
login
to post a comment