stt_bot_server.py 15.1 KB

Raw Blame History Permalink

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# SDAML, NSU, 2017, 2021
# Evgeniy Pavlovskiy
"""
This Bot uses the Updater class to handle the bot.

Then, the bot is started and runs until we press Ctrl-C on the command line.

Usage:
...
Press Ctrl-C on the command line or send a signal to the process to stop the
bot.
"""

from telegram import (ReplyKeyboardMarkup, ReplyKeyboardRemove, MessageEntity, LabeledPrice)
from telegram.ext import (Updater, CommandHandler, MessageHandler, Filters,
                          ConversationHandler, RegexHandler)
import soundfile as sf
import logging

from html import escape
import sys
import codecs
import os

# Enable logging
logging.basicConfig(format='%(asctime)s - %(name)s (%(funcName)s)- %(levelname)s - %(message)s',
                    level=logging.INFO,
                    filename = u'stt_bot.log')

logger = logging.getLogger(__name__)

from stt_voice import *

subscription_key = '1'
stt_client = Speech_Client(url="http://172.16.71.234", port=7999, subscription_key=subscription_key)

NAME, BIRTHDATE, TAGS, ADD_PROFILE, ENROLL_PROFILE, \
ENROLL_VOICE, IDENTIFY_VOICE, KEY_ENTRY, DELETE_PROFILE, \
RECOGNIZE_SPEECH = range(10)

allowed_users = [255603267 # euxsun
                ]

def check_users(update):
    user = update.message.from_user

    logger = logging.getLogger(__name__)
    logger.info('User {} trying to use this bot.'.format(user.id))

    allowed = user.id in allowed_users
    if not allowed:
        logger.info('User {} now allowed to use this bot. Write to @EuXsun to allow.'
                    .format(user.id))
#        update.message.reply_text('User {} now allowed to use this bot. Write to @EuXsun to allow.'
#                                  .format(user.id))
    return False #not allowed

def readable_profile_info(info):
    try:
        res = '/'+str(info['id'])+': '+info['name']+'('+info['gender']+'), '+\
              info['birth_date']+' ('+info['tags']+').'
        return res
    except Exception as e:
        return str(info)

def start(bot, update):
    if check_users(update):
        return
    logger = logging.getLogger(__name__)
    logger.info("/start command sent.")
    #logger.info('Sending to server request get_profiles().')

    #profiles = spk_client.get_profiles()

    #if 'error' in profiles:
    #    logger.error('Got error in response: %s' % profiles["error"])
    #    update.message.reply_text('Error with start commend.')
    #else:
    #    rep = []
    #    for p in profiles:
    #        logger.info('Sending get_profile_info(profile_id = '+ str(p) + ')')
    #        info = spk_client.get_profile_info(p)
    #        logger.info('Server answered: %s', info)
    #        update.message.reply_text(readable_profile_info(info))
    #        rep.append(''+str(p))


def help(bot, update):
    update.message.reply_text(
        'Добро пожаловать в @SpeechToTextNsuAi_bot!'
        'Отравьте голосовое сообщение или аудиофайл и бот превратит его в текст, расставив знаки препинания и выделив следующие сущности: <i>человек</i>, <b>местоположение</b>, <u>организация</u>.\n\n'
        'Обрабатываются записи не более 5 минут.\n\n'
        '/help - настоящая справка.\n\n'
        '/cancel - отмена выбранного действия.'
        'Это сибирская ❄️ разработка лаборатории аналитики потоковых данных и машинного обучения НГУ (https://bigdata.nsu.ru). \n\n',
        parse_mode='HTML'
    )


def cancel(bot, update):
    if check_users(update):
        return
    user = update.message.from_user
    logger.info("User %s canceled the process." % user.first_name)
    update.message.reply_text('Отменяю последнюю операцию.',
                              reply_markup=ReplyKeyboardRemove())

    return ConversationHandler.END

def error(bot, update, error):
    logger.error('Update "{}" caused error "{}"'.format(str(update), str(error)))
    update.message.reply_text('Error: {}.'.format(str(error)))


def recognize(but, update):
    if check_users(update):
        return

    logger = logging.getLogger(__name__)
    logger.info('User selected /recognize command.')

    update.message.reply_text('Запишите не более 5 минут речи.')

    return RECOGNIZE_SPEECH

def _parse_entity(text,entity):
#    if sys.maxunicode == 0xffff:
        return text[entity.offset:entity.offset + entity.length]
#    else:
#        entity_text = codecs.encode(str(text),'utf_16_le')
#        entity_text = entity_text[entity.offset * 2:(entity.offset + entity.length) * 2]
#
#    return codecs.decode(entity_text,'utf_16_le')

def parse_ent(message_text,entities):
    if message_text is None:
        return None

 #   if not sys.maxunicode == 0xffff:
 #       message_text = codecs.encode(message_text,'utf_16_le')

    html_text = ''
    last_offset = 0
    entities_dict = {
            entity: _parse_entity(message_text,entity)
            for entity in entities
        }

    for entity, text in sorted(entities_dict.items(), key=(lambda item: item[0].offset)):
        text = escape(text)

        if entity.type == MessageEntity.TEXT_LINK:
            insert = '<a href="{}">{}</a>'.format(entity.url, text)
        elif (entity.type == MessageEntity.URL) and urled:
            insert = '<a href="{0}">{0}</a>'.format(text)
        elif entity.type == MessageEntity.BOLD:
            insert = '<b>' + text + '</b>'
        elif entity.type == MessageEntity.ITALIC:
            insert = '<i>' + text + '</i>'
        elif entity.type == 'underline':
            insert = '<u>' + text + '</u>'
        elif entity.type == MessageEntity.CODE:
            insert = '<code>' + text + '</code>'
        elif entity.type == MessageEntity.PRE:
            insert = '<pre>' + text + '</pre>'
        else:
            insert = text

        if True: #sys.maxunicode == 0xffff:
            html_text += escape(message_text[last_offset:entity.offset]) + insert
#        else:
#            html_text += escape(codecs.decode(message_text[last_offset * 2:entity.offset * 2],
#                                'utf_16_le')) + insert

        last_offset = entity.offset + entity.length

    if True: #sys.maxunicode == 0xffff:
        html_text += escape(message_text[last_offset:])
#    else:
#        html_text += escape(codecs.decode(message_text[last_offset * 2:],'utf_16_le'))
    return html_text

def recognize_speech(bot, update):
    if check_users(update):
        return
    logger = logging.getLogger(__name__)

    if not(update.message.voice):
        logger.info('No voice in the message. Repeating.')
        update.message.reply_text('Это не запись голоса. Пожалуйста повторите, нажав кнопку микрофона (для записи голоса), или перешлите сюда запись голоса и чата. Можете также отменить операцию: /cancel.')
        return RECOGNIZE_SPEECH

    #update.message.reply_text('Processing...')
    voice_file = bot.get_file(update.message.voice.file_id)
    voice_file.download('tmp.ogg')

    logger.info("Sending information to server.")
    res = stt_client.recognize_speech(filename = 'tmp', extension = 'ogg')


    if 'message' in res:
        logger.error('Error from server: %s' % res['error'])
        update.message.reply_text("Сервер ответил с ошибкой:"+str(res))
    else:
        logger.info('Voice successfully passed to server with result: %s' % str(res))
        empty = True
        r = res
        if 'ner' in r:
            text = ' '.join([s['text'] for s in r['ner']])
            html_out = ''
            ent_cnt = 0
            for s in r['ner']:
                text = str(s['text'])
                entities = []
                if 'named_entities' in s:
                    ne = s['named_entities']
                    if 'LOCATION' in ne:
                        for o in ne['LOCATION']:
                            entities.append(
                              MessageEntity(type=MessageEntity.BOLD, offset=o[0], length=o[1]-o[0])
                            )
                    if 'PERSON' in ne:
                        for o in ne['PERSON']:
                            entities.append(
                              MessageEntity(type=MessageEntity.ITALIC, offset=o[0], length=o[1]-o[0])
                            )
                    if 'ORG' in ne:
                        for o in ne['ORG']:
                            entities.append(
                              MessageEntity(type='underline', offset=o[0], length=o[1]-o[0])
                            )
                html_out += parse_ent(text,entities)+' '
                ent_cnt += len(entities)
            empty = html_out == ''
            logger.info('Found {} entities'.format(ent_cnt))
            logger.info('Out html: {}'.format(html_out))

        elif 'stt.punct' in r:
            html_out = ' '.join([s['sent'] for s in r['stt.punct']])
            empty = html_out == ''
        if empty:
            update.message.reply_text('_Ничего не распозналось_.', parse_mode= 'Markdown')
        else:
            update.message.reply_text(html_out, quote=False, parse_mode='HTML')


    return ConversationHandler.END

def recognize_speech_file(bot, update):
    if check_users(update):
        return
    logger = logging.getLogger(__name__)

    msg = update.message.reply_text('Processing...')
    voice_file = bot.get_file(update.message.document.file_id)
    filename = voice_file.download()
    msg2 = msg.edit_text('Processing: {}'.format(filename))

    filenam, extension = os.path.splitext(filename)
    logger.info("Sending information to server: {}, {}.".format(filenam,extension[1:]))

    if not extension[1:].lower() in ['wav','mp3','m4a','ogg','amr','opus']:
        logger.info('File is not recognized extension. Repeating.')
        update.message.reply_text('Файл не того формата. Прикрепите файл wav,mp3,m4a,ogg,amr,opus или перешлите сюда запись голоса из чата. Можете также отменить операцию: /cancel.')
        return RECOGNIZE_SPEECH

    res = stt_client.recognize_speech(filename = filenam, extension = extension[1:])


    if 'message' in res:
        logger.error('Error from server: %s' % res['message'])
        msg2.edit_text("Сервер ответил с ошибкой:"+str(res['message']))
    else:
        logger.info('Voice successfully passed to server with result: %s' % str(res))
        empty = True
        r = res
        if 'ner' in r:
            text = ' '.join([s['text'] for s in r['ner']])
            html_out = ''
            ent_cnt = 0
            for s in r['ner']:
                text = str(s['text'])
                entities = []
                if 'named_entities' in s:
                    ne = s['named_entities']
                    if 'LOCATION' in ne:
                        for o in ne['LOCATION']:
                            entities.append(
                              MessageEntity(type=MessageEntity.BOLD, offset=o[0], length=o[1]-o[0])
                            )
                    if 'PERSON' in ne:
                        for o in ne['PERSON']:
                            entities.append(
                              MessageEntity(type=MessageEntity.ITALIC, offset=o[0], length=o[1]-o[0])
                            )
                    if 'ORG' in ne:
                        for o in ne['ORG']:
                            entities.append(
                              MessageEntity(type='underline', offset=o[0], length=o[1]-o[0])
                            )
                html_out += parse_ent(text,entities)+' '
                ent_cnt += len(entities)
            empty = html_out == ''
            logger.info('Found {} entities'.format(ent_cnt))
            logger.info('Out html: {}'.format(html_out))

        elif 'stt.punct' in r:
            html_out = ' '.join([s['sent'] for s in r['stt.punct']])
            empty = html_out == ''
        if empty:
            msg2.edit_text('_Ничего не распозналось_.', parse_mode= 'Markdown')
        else:
            msg2.edit_text(html_out, quote=False, parse_mode='HTML')


    return ConversationHandler.END

def donate(bot, update):
    chat_id = update.message.chat_id
    title = "Payment Example"
    description = "Payment Example using python-telegram-bot"
    # select a payload just for you to recognize its the donation from your bot
    payload = "Custom-Payload"
    # In order to get a provider_token see https://core.telegram.org/bots/payments#getting-a-token
    provider_token = ""
    currency = "RUB"
    start_parameter = "test-payment"
    # price in roubles
    price = 80
    # price * 100 so as to include 2 decimal points
    prices = [LabeledPrice("Test", price * 100)]

    # optionally pass need_name=True, need_phone_number=True,
    # need_email=True, need_shipping_address=True, is_flexible=True
    bot.send_invoice(
        chat_id, title, description, payload, provider_token, start_parameter, currency, prices
    )

# finally, after contacting the payment provider...
def successful_payment_callback(bot, update):
    # do something after successfully receiving payment?
    update.message.reply_text("Спасибо, что поддерживаете сибирскую науку!")


def main():
	# Create the EvetHandler and pass it your bot's token.
    # SpeakerIdentificatorBot
    updater = Updater("<put here bot API>")

    # Get the dispatcher to register handlers
    dp = updater.dispatcher

    #define custom handlers
    recognize_handler = ConversationHandler(
        entry_points=[MessageHandler(Filters.voice, recognize_speech),
                      MessageHandler(Filters.voice | Filters.document.category('audio') | Filters.audio, recognize_speech_file)], #CommandHandler('recognize', recognize)],

        states={
            RECOGNIZE_SPEECH: [MessageHandler(Filters.voice, recognize_speech),
                               MessageHandler(Filters.audio | Filters.voice | Filters.document.category('audio'), recognize_speech_file),
                             CommandHandler('cancel', cancel)]
        },

        fallbacks=[CommandHandler('cancel', cancel)]
    )
    dp.add_handler(CommandHandler("start", start))
    dp.add_handler(CommandHandler("help", help))
    dp.add_handler(CommandHandler("donate", donate))
    dp.add_handler(recognize_handler)

    # Success! Notify your user!
    dp.add_handler(MessageHandler(Filters.successful_payment, successful_payment_callback))

    # log all errors
    dp.add_error_handler(error)

    # Start the Bot
    updater.start_polling()

    # Run the bot until you press Ctrl-C or the process receives SIGINT,
    # SIGTERM or SIGABRT. This should be used most of the time, since
    # start_polling() is non-blocking and will stop the bot gracefully.
    updater.idle()


if __name__ == '__main__':
    main()