Showing
1 changed file
with
132 additions
and
0 deletions
stt_voice.py
0 → 100644
1 | +import base64 | |
2 | + | |
3 | +import requests | |
4 | +import json | |
5 | +import re | |
6 | +import datetime | |
7 | +import ast | |
8 | + | |
9 | +import logging | |
10 | + | |
11 | +import soundfile as sf | |
12 | +import codecs | |
13 | +import subprocess | |
14 | +import copy | |
15 | + | |
16 | +# Enable logging | |
17 | +logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
18 | + level=logging.DEBUG, | |
19 | + filename = u'stt_voice.log') | |
20 | + | |
21 | + | |
22 | +logger = logging.getLogger(__name__) | |
23 | + | |
24 | +class Speech_Client: | |
25 | + def __init__(self, url, port, subscription_key): | |
26 | + """ | |
27 | + Initialize SDK client for simple work with API server using python. | |
28 | + :param subscription_key: your subscription key that you can get from our manager. | |
29 | + :param url: API server url. For example: "https://bigdata.nsu.ru" | |
30 | + :param port: API server port. For example: "5044" | |
31 | + | |
32 | + :type self: Biometric_Client | |
33 | + :type subscription_key: str | |
34 | + :type port: int | |
35 | + :type url: str | |
36 | + """ | |
37 | + self.url = url | |
38 | + self.port = port | |
39 | + self.subscription_key = subscription_key | |
40 | + self.connection_string = url + ":" + str(port) + "/" #+ subscription_key + "/" | |
41 | + self.client = requests.session() | |
42 | + self.client.verify = False #"./ssl-cert-snakeoil1.key.cer" | |
43 | + #self.client.cert = "./ssl-cert-snakeoil.key" | |
44 | + | |
45 | + def change_subscription_key(self, subscription_key): | |
46 | + self.subscription_key = subscription_key | |
47 | + self.connection_string = self.url + ":" + str(self.port) + "/" # + self.subscription_key + "/" | |
48 | + self.client = requests.session() | |
49 | + self.client.verify = False | |
50 | + | |
51 | + | |
52 | + @staticmethod # TODO: multiple dicts in list length | |
53 | + def getJSON(response): | |
54 | + if response: | |
55 | + return response.json() | |
56 | + else: | |
57 | + raise TypeError("no service output json") #OR VALUE ERROR? | |
58 | + | |
59 | + | |
60 | + def recognize_speech(self, filename, extension): | |
61 | + """ | |
62 | + Recognize text from speech. | |
63 | + :param filename: name of audio file with person voice. String value. | |
64 | + :type filename: str | |
65 | + :param extension: audio file extension. Supported extensions: | |
66 | + * WAV | |
67 | + * MP3 | |
68 | + * OGG | |
69 | + * FLAC | |
70 | + :type extension: str | |
71 | + :return: | |
72 | + * message - error condition. 1 - result with error. 0 - result without error | |
73 | + * result - if no error: list of dictionaries. Each item of list contains: | |
74 | + ** "stt.punct" - list of recognized sentences. | |
75 | + ** "sent" - float value between 0 and ~110. Bigger value mean that identifying voice most look like sample voice | |
76 | + that was enrolled to profile before. | |
77 | + If have error then return error string. | |
78 | + :rtype: dict | |
79 | + """ | |
80 | + logger = logging.getLogger(__name__) | |
81 | + | |
82 | + try: | |
83 | + stt_connection_string = self.url + ":8000" + "/recognize" | |
84 | + sttp_connection_string = self.url + ":8001" + "/recognize" | |
85 | + ner_connection_string = self.url + ":8002" + "/recognize" | |
86 | + | |
87 | + | |
88 | + # transform to 8khz WAV 16bit PCM | |
89 | + subprocess.run(["ffmpeg",'-y','-t','300','-i', filename+'.'+extension, '-ar','8000', '-ac','1', 'tmp.wav']) | |
90 | + | |
91 | +# data, samplerate = sf.read(filename+'.'+extension) | |
92 | +# sf.write(filename+'.wav', data, 8000) | |
93 | + # run STT | |
94 | + f = open('tmp.wav', 'rb') | |
95 | + files = {'wav':('tmp.wav',f,'audio/wave'), | |
96 | + 'json':('tmp.json',open('tmp.json','rb'),'application/json')} | |
97 | + | |
98 | + logger.info('POST request to connection string: %s', stt_connection_string) | |
99 | + r = requests.post(stt_connection_string,files=files) | |
100 | + logger.info('Got response: {}'.format(r.text)) | |
101 | + stt_json = self.getJSON(r) | |
102 | + r.close() | |
103 | + with codecs.open('tmp1.json', mode='w', encoding='utf-8', errors='ignore') as f: | |
104 | + json.dump(stt_json, fp=f, ensure_ascii=False) | |
105 | + | |
106 | + # run STT.PUNCT | |
107 | + f = open('tmp.wav', 'rb') | |
108 | + files = {'json':('tmp1.json',open('tmp1.json','rb'),'application/json')} | |
109 | + | |
110 | + logger.info('POST request to connection string: %s', sttp_connection_string) | |
111 | + r = requests.post(sttp_connection_string,files=files) | |
112 | + sttp_json = self.getJSON(r) | |
113 | + r.close() | |
114 | + | |
115 | + logger.info('Server response: {}'.format(sttp_json)) | |
116 | + | |
117 | + # run NER | |
118 | + logger.info('POST request to connection string: %s', ner_connection_string) | |
119 | + sttp_json['stt.dictors']=copy.deepcopy(sttp_json['stt.punct']) | |
120 | + for s in sttp_json['stt.dictors']: | |
121 | + s['text']=s['sent'] | |
122 | + r = requests.post(ner_connection_string,json=sttp_json) | |
123 | + ner_json = self.getJSON(r) | |
124 | + r.close() | |
125 | + | |
126 | + logger.info('Server response: {}'.format(ner_json)) | |
127 | + | |
128 | + result = ner_json | |
129 | + return result | |
130 | + except Exception as e: | |
131 | + return {"message": str(e)+"; " + str(e.__doc__)} | |
132 | + | ... | ... |
Please
register
or
login
to post a comment