Showing
1 changed file
with
132 additions
and
0 deletions
stt_voice.py
0 → 100644
1 | +import base64 | ||
2 | + | ||
3 | +import requests | ||
4 | +import json | ||
5 | +import re | ||
6 | +import datetime | ||
7 | +import ast | ||
8 | + | ||
9 | +import logging | ||
10 | + | ||
11 | +import soundfile as sf | ||
12 | +import codecs | ||
13 | +import subprocess | ||
14 | +import copy | ||
15 | + | ||
16 | +# Enable logging | ||
17 | +logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | ||
18 | + level=logging.DEBUG, | ||
19 | + filename = u'stt_voice.log') | ||
20 | + | ||
21 | + | ||
22 | +logger = logging.getLogger(__name__) | ||
23 | + | ||
24 | +class Speech_Client: | ||
25 | + def __init__(self, url, port, subscription_key): | ||
26 | + """ | ||
27 | + Initialize SDK client for simple work with API server using python. | ||
28 | + :param subscription_key: your subscription key that you can get from our manager. | ||
29 | + :param url: API server url. For example: "https://bigdata.nsu.ru" | ||
30 | + :param port: API server port. For example: "5044" | ||
31 | + | ||
32 | + :type self: Biometric_Client | ||
33 | + :type subscription_key: str | ||
34 | + :type port: int | ||
35 | + :type url: str | ||
36 | + """ | ||
37 | + self.url = url | ||
38 | + self.port = port | ||
39 | + self.subscription_key = subscription_key | ||
40 | + self.connection_string = url + ":" + str(port) + "/" #+ subscription_key + "/" | ||
41 | + self.client = requests.session() | ||
42 | + self.client.verify = False #"./ssl-cert-snakeoil1.key.cer" | ||
43 | + #self.client.cert = "./ssl-cert-snakeoil.key" | ||
44 | + | ||
45 | + def change_subscription_key(self, subscription_key): | ||
46 | + self.subscription_key = subscription_key | ||
47 | + self.connection_string = self.url + ":" + str(self.port) + "/" # + self.subscription_key + "/" | ||
48 | + self.client = requests.session() | ||
49 | + self.client.verify = False | ||
50 | + | ||
51 | + | ||
52 | + @staticmethod # TODO: multiple dicts in list length | ||
53 | + def getJSON(response): | ||
54 | + if response: | ||
55 | + return response.json() | ||
56 | + else: | ||
57 | + raise TypeError("no service output json") #OR VALUE ERROR? | ||
58 | + | ||
59 | + | ||
60 | + def recognize_speech(self, filename, extension): | ||
61 | + """ | ||
62 | + Recognize text from speech. | ||
63 | + :param filename: name of audio file with person voice. String value. | ||
64 | + :type filename: str | ||
65 | + :param extension: audio file extension. Supported extensions: | ||
66 | + * WAV | ||
67 | + * MP3 | ||
68 | + * OGG | ||
69 | + * FLAC | ||
70 | + :type extension: str | ||
71 | + :return: | ||
72 | + * message - error condition. 1 - result with error. 0 - result without error | ||
73 | + * result - if no error: list of dictionaries. Each item of list contains: | ||
74 | + ** "stt.punct" - list of recognized sentences. | ||
75 | + ** "sent" - float value between 0 and ~110. Bigger value mean that identifying voice most look like sample voice | ||
76 | + that was enrolled to profile before. | ||
77 | + If have error then return error string. | ||
78 | + :rtype: dict | ||
79 | + """ | ||
80 | + logger = logging.getLogger(__name__) | ||
81 | + | ||
82 | + try: | ||
83 | + stt_connection_string = self.url + ":8000" + "/recognize" | ||
84 | + sttp_connection_string = self.url + ":8001" + "/recognize" | ||
85 | + ner_connection_string = self.url + ":8002" + "/recognize" | ||
86 | + | ||
87 | + | ||
88 | + # transform to 8khz WAV 16bit PCM | ||
89 | + subprocess.run(["ffmpeg",'-y','-t','300','-i', filename+'.'+extension, '-ar','8000', '-ac','1', 'tmp.wav']) | ||
90 | + | ||
91 | +# data, samplerate = sf.read(filename+'.'+extension) | ||
92 | +# sf.write(filename+'.wav', data, 8000) | ||
93 | + # run STT | ||
94 | + f = open('tmp.wav', 'rb') | ||
95 | + files = {'wav':('tmp.wav',f,'audio/wave'), | ||
96 | + 'json':('tmp.json',open('tmp.json','rb'),'application/json')} | ||
97 | + | ||
98 | + logger.info('POST request to connection string: %s', stt_connection_string) | ||
99 | + r = requests.post(stt_connection_string,files=files) | ||
100 | + logger.info('Got response: {}'.format(r.text)) | ||
101 | + stt_json = self.getJSON(r) | ||
102 | + r.close() | ||
103 | + with codecs.open('tmp1.json', mode='w', encoding='utf-8', errors='ignore') as f: | ||
104 | + json.dump(stt_json, fp=f, ensure_ascii=False) | ||
105 | + | ||
106 | + # run STT.PUNCT | ||
107 | + f = open('tmp.wav', 'rb') | ||
108 | + files = {'json':('tmp1.json',open('tmp1.json','rb'),'application/json')} | ||
109 | + | ||
110 | + logger.info('POST request to connection string: %s', sttp_connection_string) | ||
111 | + r = requests.post(sttp_connection_string,files=files) | ||
112 | + sttp_json = self.getJSON(r) | ||
113 | + r.close() | ||
114 | + | ||
115 | + logger.info('Server response: {}'.format(sttp_json)) | ||
116 | + | ||
117 | + # run NER | ||
118 | + logger.info('POST request to connection string: %s', ner_connection_string) | ||
119 | + sttp_json['stt.dictors']=copy.deepcopy(sttp_json['stt.punct']) | ||
120 | + for s in sttp_json['stt.dictors']: | ||
121 | + s['text']=s['sent'] | ||
122 | + r = requests.post(ner_connection_string,json=sttp_json) | ||
123 | + ner_json = self.getJSON(r) | ||
124 | + r.close() | ||
125 | + | ||
126 | + logger.info('Server response: {}'.format(ner_json)) | ||
127 | + | ||
128 | + result = ner_json | ||
129 | + return result | ||
130 | + except Exception as e: | ||
131 | + return {"message": str(e)+"; " + str(e.__doc__)} | ||
132 | + |
Please
register
or
login
to post a comment