Commit c20afed92e679c79f5622be057c66906df514035
1 parent
ee505ab7
Added sample bot server with required python libraries list
Showing
2 changed files
with
409 additions
and
0 deletions
requirements.txt
0 → 100644
1 | +APScheduler==3.6.3 | ||
2 | +certifi==2020.12.5 | ||
3 | +cffi==1.14.4 | ||
4 | +chardet==4.0.0 | ||
5 | +cryptography==3.3.1 | ||
6 | +decorator==4.4.2 | ||
7 | +future==0.18.2 | ||
8 | +idna==2.10 | ||
9 | +pycparser==2.20 | ||
10 | +python-telegram-bot==11.0.0 | ||
11 | +pytz==2020.5 | ||
12 | +requests==2.25.1 | ||
13 | +six==1.15.0 | ||
14 | +SoundFile==0.10.3.post1 | ||
15 | +tornado==6.1 | ||
16 | +tzlocal==2.1 | ||
17 | +urllib3==1.26.3 |
stt_bot_server.py
0 → 100644
1 | +#!/usr/bin/env python | ||
2 | +# -*- coding: utf-8 -*- | ||
3 | +# | ||
4 | +# SDAML, NSU, 2017, 2021 | ||
5 | +# Evgeniy Pavlovskiy | ||
6 | +""" | ||
7 | +This Bot uses the Updater class to handle the bot. | ||
8 | + | ||
9 | +Then, the bot is started and runs until we press Ctrl-C on the command line. | ||
10 | + | ||
11 | +Usage: | ||
12 | +... | ||
13 | +Press Ctrl-C on the command line or send a signal to the process to stop the | ||
14 | +bot. | ||
15 | +""" | ||
16 | + | ||
17 | +from telegram import (ReplyKeyboardMarkup, ReplyKeyboardRemove, MessageEntity, LabeledPrice) | ||
18 | +from telegram.ext import (Updater, CommandHandler, MessageHandler, Filters, | ||
19 | + ConversationHandler, RegexHandler) | ||
20 | +import soundfile as sf | ||
21 | +import logging | ||
22 | + | ||
23 | +from html import escape | ||
24 | +import sys | ||
25 | +import codecs | ||
26 | +import os | ||
27 | + | ||
28 | +# Enable logging | ||
29 | +logging.basicConfig(format='%(asctime)s - %(name)s (%(funcName)s)- %(levelname)s - %(message)s', | ||
30 | + level=logging.INFO, | ||
31 | + filename = u'stt_bot.log') | ||
32 | + | ||
33 | +logger = logging.getLogger(__name__) | ||
34 | + | ||
35 | +from sdk_voice import * | ||
36 | +from stt_voice import * | ||
37 | + | ||
38 | +subscription_key = '1' | ||
39 | +stt_client = Speech_Client(url="http://172.16.71.234", port=7999, subscription_key=subscription_key) | ||
40 | + | ||
41 | +NAME, BIRTHDATE, TAGS, ADD_PROFILE, ENROLL_PROFILE, \ | ||
42 | +ENROLL_VOICE, IDENTIFY_VOICE, KEY_ENTRY, DELETE_PROFILE, \ | ||
43 | +RECOGNIZE_SPEECH = range(10) | ||
44 | + | ||
45 | +allowed_users = [255603267 # euxsun | ||
46 | + ] | ||
47 | + | ||
48 | +def check_users(update): | ||
49 | + user = update.message.from_user | ||
50 | + | ||
51 | + logger = logging.getLogger(__name__) | ||
52 | + logger.info('User {} trying to use this bot.'.format(user.id)) | ||
53 | + | ||
54 | + allowed = user.id in allowed_users | ||
55 | + if not allowed: | ||
56 | + logger.info('User {} now allowed to use this bot. Write to @EuXsun to allow.' | ||
57 | + .format(user.id)) | ||
58 | +# update.message.reply_text('User {} now allowed to use this bot. Write to @EuXsun to allow.' | ||
59 | +# .format(user.id)) | ||
60 | + return False #not allowed | ||
61 | + | ||
62 | +def readable_profile_info(info): | ||
63 | + try: | ||
64 | + res = '/'+str(info['id'])+': '+info['name']+'('+info['gender']+'), '+\ | ||
65 | + info['birth_date']+' ('+info['tags']+').' | ||
66 | + return res | ||
67 | + except Exception as e: | ||
68 | + return str(info) | ||
69 | + | ||
70 | +def start(bot, update): | ||
71 | + if check_users(update): | ||
72 | + return | ||
73 | + logger = logging.getLogger(__name__) | ||
74 | + logger.info("/start command sent.") | ||
75 | + #logger.info('Sending to server request get_profiles().') | ||
76 | + | ||
77 | + #profiles = spk_client.get_profiles() | ||
78 | + | ||
79 | + #if 'error' in profiles: | ||
80 | + # logger.error('Got error in response: %s' % profiles["error"]) | ||
81 | + # update.message.reply_text('Error with start commend.') | ||
82 | + #else: | ||
83 | + # rep = [] | ||
84 | + # for p in profiles: | ||
85 | + # logger.info('Sending get_profile_info(profile_id = '+ str(p) + ')') | ||
86 | + # info = spk_client.get_profile_info(p) | ||
87 | + # logger.info('Server answered: %s', info) | ||
88 | + # update.message.reply_text(readable_profile_info(info)) | ||
89 | + # rep.append(''+str(p)) | ||
90 | + | ||
91 | + | ||
92 | + | ||
93 | +def help(bot, update): | ||
94 | + update.message.reply_text( | ||
95 | + 'Добро пожаловать в @SpeechToTextNsuAi_bot!' | ||
96 | + 'Отравьте голосовое сообщение или аудиофайл и бот превратит его в текст, расставив знаки препинания и выделив следующие сущности: <i>человек</i>, <b>местоположение</b>, <u>организация</u>.\n\n' | ||
97 | + 'Обрабатываются записи не более 5 минут.\n\n' | ||
98 | + '/help - настоящая справка.\n\n' | ||
99 | + '/cancel - отмена выбранного действия.' | ||
100 | + 'Это сибирская ❄️ разработка лаборатории аналитики потоковых данных и машинного обучения НГУ (https://bigdata.nsu.ru). \n\n', | ||
101 | + parse_mode='HTML' | ||
102 | + ) | ||
103 | + | ||
104 | + | ||
105 | +def cancel(bot, update): | ||
106 | + if check_users(update): | ||
107 | + return | ||
108 | + user = update.message.from_user | ||
109 | + logger.info("User %s canceled the process." % user.first_name) | ||
110 | + update.message.reply_text('Отменяю последнюю операцию.', | ||
111 | + reply_markup=ReplyKeyboardRemove()) | ||
112 | + | ||
113 | + return ConversationHandler.END | ||
114 | + | ||
115 | +def error(bot, update, error): | ||
116 | + logger.error('Update "{}" caused error "{}"'.format(str(update), str(error))) | ||
117 | + update.message.reply_text('Error: {}.'.format(str(error))) | ||
118 | + | ||
119 | + | ||
120 | +def recognize(but, update): | ||
121 | + if check_users(update): | ||
122 | + return | ||
123 | + | ||
124 | + logger = logging.getLogger(__name__) | ||
125 | + logger.info('User selected /recognize command.') | ||
126 | + | ||
127 | + update.message.reply_text('Запишите не более 5 минут речи.') | ||
128 | + | ||
129 | + return RECOGNIZE_SPEECH | ||
130 | + | ||
131 | +def _parse_entity(text,entity): | ||
132 | +# if sys.maxunicode == 0xffff: | ||
133 | + return text[entity.offset:entity.offset + entity.length] | ||
134 | +# else: | ||
135 | +# entity_text = codecs.encode(str(text),'utf_16_le') | ||
136 | +# entity_text = entity_text[entity.offset * 2:(entity.offset + entity.length) * 2] | ||
137 | +# | ||
138 | +# return codecs.decode(entity_text,'utf_16_le') | ||
139 | + | ||
140 | +def parse_ent(message_text,entities): | ||
141 | + if message_text is None: | ||
142 | + return None | ||
143 | + | ||
144 | + # if not sys.maxunicode == 0xffff: | ||
145 | + # message_text = codecs.encode(message_text,'utf_16_le') | ||
146 | + | ||
147 | + html_text = '' | ||
148 | + last_offset = 0 | ||
149 | + entities_dict = { | ||
150 | + entity: _parse_entity(message_text,entity) | ||
151 | + for entity in entities | ||
152 | + } | ||
153 | + | ||
154 | + for entity, text in sorted(entities_dict.items(), key=(lambda item: item[0].offset)): | ||
155 | + text = escape(text) | ||
156 | + | ||
157 | + if entity.type == MessageEntity.TEXT_LINK: | ||
158 | + insert = '<a href="{}">{}</a>'.format(entity.url, text) | ||
159 | + elif (entity.type == MessageEntity.URL) and urled: | ||
160 | + insert = '<a href="{0}">{0}</a>'.format(text) | ||
161 | + elif entity.type == MessageEntity.BOLD: | ||
162 | + insert = '<b>' + text + '</b>' | ||
163 | + elif entity.type == MessageEntity.ITALIC: | ||
164 | + insert = '<i>' + text + '</i>' | ||
165 | + elif entity.type == 'underline': | ||
166 | + insert = '<u>' + text + '</u>' | ||
167 | + elif entity.type == MessageEntity.CODE: | ||
168 | + insert = '<code>' + text + '</code>' | ||
169 | + elif entity.type == MessageEntity.PRE: | ||
170 | + insert = '<pre>' + text + '</pre>' | ||
171 | + else: | ||
172 | + insert = text | ||
173 | + | ||
174 | + if True: #sys.maxunicode == 0xffff: | ||
175 | + html_text += escape(message_text[last_offset:entity.offset]) + insert | ||
176 | +# else: | ||
177 | +# html_text += escape(codecs.decode(message_text[last_offset * 2:entity.offset * 2], | ||
178 | +# 'utf_16_le')) + insert | ||
179 | + | ||
180 | + last_offset = entity.offset + entity.length | ||
181 | + | ||
182 | + if True: #sys.maxunicode == 0xffff: | ||
183 | + html_text += escape(message_text[last_offset:]) | ||
184 | +# else: | ||
185 | +# html_text += escape(codecs.decode(message_text[last_offset * 2:],'utf_16_le')) | ||
186 | + return html_text | ||
187 | + | ||
188 | +def recognize_speech(bot, update): | ||
189 | + if check_users(update): | ||
190 | + return | ||
191 | + logger = logging.getLogger(__name__) | ||
192 | + | ||
193 | + if not(update.message.voice): | ||
194 | + logger.info('No voice in the message. Repeating.') | ||
195 | + update.message.reply_text('Это не запись голоса. Пожалуйста повторите, нажав кнопку микрофона (для записи голоса), или перешлите сюда запись голоса и чата. Можете также отменить операцию: /cancel.') | ||
196 | + return RECOGNIZE_SPEECH | ||
197 | + | ||
198 | + #update.message.reply_text('Processing...') | ||
199 | + voice_file = bot.get_file(update.message.voice.file_id) | ||
200 | + voice_file.download('tmp.ogg') | ||
201 | + | ||
202 | + logger.info("Sending information to server.") | ||
203 | + res = stt_client.recognize_speech(filename = 'tmp', extension = 'ogg') | ||
204 | + | ||
205 | + | ||
206 | + if 'message' in res: | ||
207 | + logger.error('Error from server: %s' % res['error']) | ||
208 | + update.message.reply_text("Сервер ответил с ошибкой:"+str(res)) | ||
209 | + else: | ||
210 | + logger.info('Voice successfully passed to server with result: %s' % str(res)) | ||
211 | + empty = True | ||
212 | + r = res | ||
213 | + if 'ner' in r: | ||
214 | + text = ' '.join([s['text'] for s in r['ner']]) | ||
215 | + html_out = '' | ||
216 | + ent_cnt = 0 | ||
217 | + for s in r['ner']: | ||
218 | + text = str(s['text']) | ||
219 | + entities = [] | ||
220 | + if 'named_entities' in s: | ||
221 | + ne = s['named_entities'] | ||
222 | + if 'LOCATION' in ne: | ||
223 | + for o in ne['LOCATION']: | ||
224 | + entities.append( | ||
225 | + MessageEntity(type=MessageEntity.BOLD, offset=o[0], length=o[1]-o[0]) | ||
226 | + ) | ||
227 | + if 'PERSON' in ne: | ||
228 | + for o in ne['PERSON']: | ||
229 | + entities.append( | ||
230 | + MessageEntity(type=MessageEntity.ITALIC, offset=o[0], length=o[1]-o[0]) | ||
231 | + ) | ||
232 | + if 'ORG' in ne: | ||
233 | + for o in ne['ORG']: | ||
234 | + entities.append( | ||
235 | + MessageEntity(type='underline', offset=o[0], length=o[1]-o[0]) | ||
236 | + ) | ||
237 | + html_out += parse_ent(text,entities)+' ' | ||
238 | + ent_cnt += len(entities) | ||
239 | + empty = html_out == '' | ||
240 | + logger.info('Found {} entities'.format(ent_cnt)) | ||
241 | + logger.info('Out html: {}'.format(html_out)) | ||
242 | + | ||
243 | + elif 'stt.punct' in r: | ||
244 | + html_out = ' '.join([s['sent'] for s in r['stt.punct']]) | ||
245 | + empty = html_out == '' | ||
246 | + if empty: | ||
247 | + update.message.reply_text('_Ничего не распозналось_.', parse_mode= 'Markdown') | ||
248 | + else: | ||
249 | + update.message.reply_text(html_out, quote=False, parse_mode='HTML') | ||
250 | + | ||
251 | + | ||
252 | + return ConversationHandler.END | ||
253 | + | ||
254 | +def recognize_speech_file(bot, update): | ||
255 | + if check_users(update): | ||
256 | + return | ||
257 | + logger = logging.getLogger(__name__) | ||
258 | + | ||
259 | + msg = update.message.reply_text('Processing...') | ||
260 | + voice_file = bot.get_file(update.message.document.file_id) | ||
261 | + filename = voice_file.download() | ||
262 | + msg2 = msg.edit_text('Processing: {}'.format(filename)) | ||
263 | + | ||
264 | + filenam, extension = os.path.splitext(filename) | ||
265 | + logger.info("Sending information to server: {}, {}.".format(filenam,extension[1:])) | ||
266 | + | ||
267 | + if not extension[1:].lower() in ['wav','mp3','m4a','ogg','amr','opus']: | ||
268 | + logger.info('File is not recognized extension. Repeating.') | ||
269 | + update.message.reply_text('Файл не того формата. Прикрепите файл wav,mp3,m4a,ogg,amr,opus или перешлите сюда запись голоса из чата. Можете также отменить операцию: /cancel.') | ||
270 | + return RECOGNIZE_SPEECH | ||
271 | + | ||
272 | + res = stt_client.recognize_speech(filename = filenam, extension = extension[1:]) | ||
273 | + | ||
274 | + | ||
275 | + if 'message' in res: | ||
276 | + logger.error('Error from server: %s' % res['message']) | ||
277 | + msg2.edit_text("Сервер ответил с ошибкой:"+str(res['message'])) | ||
278 | + else: | ||
279 | + logger.info('Voice successfully passed to server with result: %s' % str(res)) | ||
280 | + empty = True | ||
281 | + r = res | ||
282 | + if 'ner' in r: | ||
283 | + text = ' '.join([s['text'] for s in r['ner']]) | ||
284 | + html_out = '' | ||
285 | + ent_cnt = 0 | ||
286 | + for s in r['ner']: | ||
287 | + text = str(s['text']) | ||
288 | + entities = [] | ||
289 | + if 'named_entities' in s: | ||
290 | + ne = s['named_entities'] | ||
291 | + if 'LOCATION' in ne: | ||
292 | + for o in ne['LOCATION']: | ||
293 | + entities.append( | ||
294 | + MessageEntity(type=MessageEntity.BOLD, offset=o[0], length=o[1]-o[0]) | ||
295 | + ) | ||
296 | + if 'PERSON' in ne: | ||
297 | + for o in ne['PERSON']: | ||
298 | + entities.append( | ||
299 | + MessageEntity(type=MessageEntity.ITALIC, offset=o[0], length=o[1]-o[0]) | ||
300 | + ) | ||
301 | + if 'ORG' in ne: | ||
302 | + for o in ne['ORG']: | ||
303 | + entities.append( | ||
304 | + MessageEntity(type='underline', offset=o[0], length=o[1]-o[0]) | ||
305 | + ) | ||
306 | + html_out += parse_ent(text,entities)+' ' | ||
307 | + ent_cnt += len(entities) | ||
308 | + empty = html_out == '' | ||
309 | + logger.info('Found {} entities'.format(ent_cnt)) | ||
310 | + logger.info('Out html: {}'.format(html_out)) | ||
311 | + | ||
312 | + elif 'stt.punct' in r: | ||
313 | + html_out = ' '.join([s['sent'] for s in r['stt.punct']]) | ||
314 | + empty = html_out == '' | ||
315 | + if empty: | ||
316 | + msg2.edit_text('_Ничего не распозналось_.', parse_mode= 'Markdown') | ||
317 | + else: | ||
318 | + msg2.edit_text(html_out, quote=False, parse_mode='HTML') | ||
319 | + | ||
320 | + | ||
321 | + return ConversationHandler.END | ||
322 | + | ||
323 | +def donate(bot, update): | ||
324 | + chat_id = update.message.chat_id | ||
325 | + title = "Payment Example" | ||
326 | + description = "Payment Example using python-telegram-bot" | ||
327 | + # select a payload just for you to recognize its the donation from your bot | ||
328 | + payload = "Custom-Payload" | ||
329 | + # In order to get a provider_token see https://core.telegram.org/bots/payments#getting-a-token | ||
330 | + provider_token = "" | ||
331 | + currency = "RUB" | ||
332 | + start_parameter = "test-payment" | ||
333 | + # price in roubles | ||
334 | + price = 80 | ||
335 | + # price * 100 so as to include 2 decimal points | ||
336 | + prices = [LabeledPrice("Test", price * 100)] | ||
337 | + | ||
338 | + # optionally pass need_name=True, need_phone_number=True, | ||
339 | + # need_email=True, need_shipping_address=True, is_flexible=True | ||
340 | + bot.send_invoice( | ||
341 | + chat_id, title, description, payload, provider_token, start_parameter, currency, prices | ||
342 | + ) | ||
343 | + | ||
344 | +# finally, after contacting the payment provider... | ||
345 | +def successful_payment_callback(bot, update): | ||
346 | + # do something after successfully receiving payment? | ||
347 | + update.message.reply_text("Спасибо, что поддерживаете сибирскую науку!") | ||
348 | + | ||
349 | + | ||
350 | +def main(): | ||
351 | + # Create the EvetHandler and pass it your bot's token. | ||
352 | + # SpeakerIdentificatorBot | ||
353 | + updater = Updater("<put here bot API>") | ||
354 | + | ||
355 | + # Get the dispatcher to register handlers | ||
356 | + dp = updater.dispatcher | ||
357 | + | ||
358 | + #define custom handlers | ||
359 | + recognize_handler = ConversationHandler( | ||
360 | + entry_points=[MessageHandler(Filters.voice, recognize_speech), | ||
361 | + MessageHandler(Filters.voice | Filters.document.category('audio') | Filters.audio, recognize_speech_file)], #CommandHandler('recognize', recognize)], | ||
362 | + | ||
363 | + states={ | ||
364 | + RECOGNIZE_SPEECH: [MessageHandler(Filters.voice, recognize_speech), | ||
365 | + MessageHandler(Filters.audio | Filters.voice | Filters.document.category('audio'), recognize_speech_file), | ||
366 | + CommandHandler('cancel', cancel)] | ||
367 | + }, | ||
368 | + | ||
369 | + fallbacks=[CommandHandler('cancel', cancel)] | ||
370 | + ) | ||
371 | + dp.add_handler(CommandHandler("start", start)) | ||
372 | + dp.add_handler(CommandHandler("help", help)) | ||
373 | + dp.add_handler(CommandHandler("donate", donate)) | ||
374 | + dp.add_handler(recognize_handler) | ||
375 | + | ||
376 | + # Success! Notify your user! | ||
377 | + dp.add_handler(MessageHandler(Filters.successful_payment, successful_payment_callback)) | ||
378 | + | ||
379 | + # log all errors | ||
380 | + dp.add_error_handler(error) | ||
381 | + | ||
382 | + # Start the Bot | ||
383 | + updater.start_polling() | ||
384 | + | ||
385 | + # Run the bot until you press Ctrl-C or the process receives SIGINT, | ||
386 | + # SIGTERM or SIGABRT. This should be used most of the time, since | ||
387 | + # start_polling() is non-blocking and will stop the bot gracefully. | ||
388 | + updater.idle() | ||
389 | + | ||
390 | + | ||
391 | +if __name__ == '__main__': | ||
392 | + main() |
Please
register
or
login
to post a comment