Added missed stt_voice file

Evgeniy Pavlovskiy
1 parent c20afed9
Showing 1 changed file with 132 additions and 0 deletions
stt_voice.py
--- a/stt_voice.py 0 → 100644
View file @96302cd
+++ b/stt_voice.py 0 → 100644
View file @96302cd
+import base64
+
+import requests
+import json
+import re
+import datetime
+import ast
+
+import logging
+
+import soundfile as sf
+import codecs
+import subprocess
+import copy
+
+# Enable logging
+logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+                    level=logging.DEBUG,
+                    filename = u'stt_voice.log')
+
+
+logger = logging.getLogger(__name__)
+
+class Speech_Client:
+    def __init__(self, url, port, subscription_key):
+        """
+        Initialize SDK client for simple work with API server using python.
+        :param subscription_key: your subscription key that you can get from our manager.
+        :param url: API server url. For example: "https://bigdata.nsu.ru"
+        :param port: API server port. For example: "5044"
+
+        :type self: Biometric_Client
+        :type subscription_key: str
+        :type port: int
+        :type url: str
+        """
+        self.url = url
+        self.port = port
+        self.subscription_key = subscription_key
+        self.connection_string = url + ":" + str(port) + "/" #+ subscription_key + "/"
+        self.client = requests.session()
+        self.client.verify = False #"./ssl-cert-snakeoil1.key.cer"
+        #self.client.cert = "./ssl-cert-snakeoil.key"
+    
+    def change_subscription_key(self, subscription_key):
+        self.subscription_key = subscription_key
+        self.connection_string = self.url + ":" + str(self.port) + "/" # + self.subscription_key + "/"
+        self.client = requests.session()
+        self.client.verify = False
+        
+
+    @staticmethod                   # TODO: multiple dicts in list length
+    def getJSON(response):
+        if response:
+            return response.json()
+        else:
+            raise TypeError("no service output json") #OR VALUE ERROR?
+
+
+    def recognize_speech(self, filename, extension):
+        """
+        Recognize text from speech.
+        :param filename: name of audio file with person voice. String value.
+        :type filename: str
+        :param extension: audio file extension. Supported extensions:
+        * WAV
+        * MP3
+        * OGG
+        * FLAC
+        :type extension: str
+        :return:
+        * message - error condition. 1 - result with error. 0 - result without error
+        * result - if no error: list of dictionaries. Each item of list contains:
+            ** "stt.punct" - list of recognized sentences.
+            ** "sent" - float value between 0 and ~110. Bigger value mean that identifying voice most look like sample voice
+            that was enrolled to profile before.
+        If have error then return error string.
+        :rtype: dict
+        """
+        logger = logging.getLogger(__name__)
+        
+        try:
+            stt_connection_string = self.url + ":8000" + "/recognize"
+            sttp_connection_string = self.url + ":8001" + "/recognize"
+            ner_connection_string = self.url + ":8002" + "/recognize"
+            
+
+            # transform to 8khz WAV 16bit PCM
+            subprocess.run(["ffmpeg",'-y','-t','300','-i', filename+'.'+extension, '-ar','8000', '-ac','1', 'tmp.wav'])
+
+#            data, samplerate = sf.read(filename+'.'+extension)
+#            sf.write(filename+'.wav', data, 8000)
+            # run STT
+            f = open('tmp.wav', 'rb')
+            files = {'wav':('tmp.wav',f,'audio/wave'),
+                     'json':('tmp.json',open('tmp.json','rb'),'application/json')}
+
+            logger.info('POST request to connection string: %s', stt_connection_string)
+            r = requests.post(stt_connection_string,files=files)
+            logger.info('Got response: {}'.format(r.text))
+            stt_json = self.getJSON(r)
+            r.close()
+            with codecs.open('tmp1.json', mode='w', encoding='utf-8', errors='ignore') as f:
+                json.dump(stt_json, fp=f, ensure_ascii=False)
+
+            # run STT.PUNCT
+            f = open('tmp.wav', 'rb')
+            files = {'json':('tmp1.json',open('tmp1.json','rb'),'application/json')}
+
+            logger.info('POST request to connection string: %s', sttp_connection_string)
+            r = requests.post(sttp_connection_string,files=files)
+            sttp_json = self.getJSON(r)
+            r.close()
+            
+            logger.info('Server response: {}'.format(sttp_json))
+            
+            # run NER
+            logger.info('POST request to connection string: %s', ner_connection_string)
+            sttp_json['stt.dictors']=copy.deepcopy(sttp_json['stt.punct'])
+            for s in sttp_json['stt.dictors']:
+                s['text']=s['sent']
+            r = requests.post(ner_connection_string,json=sttp_json)
+            ner_json = self.getJSON(r)
+            r.close()
+            
+            logger.info('Server response: {}'.format(ner_json))
+            
+            result = ner_json
+            return result
+        except Exception as e:
+            return {"message": str(e)+"; " + str(e.__doc__)}
+