stt_voice.py
4.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import base64
import requests
import json
import re
import datetime
import ast
import logging
import soundfile as sf
import codecs
import subprocess
import copy
# Enable logging
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG,
filename = u'stt_voice.log')
logger = logging.getLogger(__name__)
class Speech_Client:
def __init__(self, url, port, subscription_key):
"""
Initialize SDK client for simple work with API server using python.
:param subscription_key: your subscription key that you can get from our manager.
:param url: API server url. For example: "https://bigdata.nsu.ru"
:param port: API server port. For example: "5044"
:type self: Biometric_Client
:type subscription_key: str
:type port: int
:type url: str
"""
self.url = url
self.port = port
self.subscription_key = subscription_key
self.connection_string = url + ":" + str(port) + "/" #+ subscription_key + "/"
self.client = requests.session()
self.client.verify = False #"./ssl-cert-snakeoil1.key.cer"
#self.client.cert = "./ssl-cert-snakeoil.key"
def change_subscription_key(self, subscription_key):
self.subscription_key = subscription_key
self.connection_string = self.url + ":" + str(self.port) + "/" # + self.subscription_key + "/"
self.client = requests.session()
self.client.verify = False
@staticmethod # TODO: multiple dicts in list length
def getJSON(response):
if response:
return response.json()
else:
raise TypeError("no service output json") #OR VALUE ERROR?
def recognize_speech(self, filename, extension):
"""
Recognize text from speech.
:param filename: name of audio file with person voice. String value.
:type filename: str
:param extension: audio file extension. Supported extensions:
* WAV
* MP3
* OGG
* FLAC
:type extension: str
:return:
* message - error condition. 1 - result with error. 0 - result without error
* result - if no error: list of dictionaries. Each item of list contains:
** "stt.punct" - list of recognized sentences.
** "sent" - float value between 0 and ~110. Bigger value mean that identifying voice most look like sample voice
that was enrolled to profile before.
If have error then return error string.
:rtype: dict
"""
logger = logging.getLogger(__name__)
try:
stt_connection_string = self.url + ":8000" + "/recognize"
sttp_connection_string = self.url + ":8001" + "/recognize"
ner_connection_string = self.url + ":8002" + "/recognize"
# transform to 8khz WAV 16bit PCM
subprocess.run(["ffmpeg",'-y','-t','300','-i', filename+'.'+extension, '-ar','8000', '-ac','1', 'tmp.wav'])
# data, samplerate = sf.read(filename+'.'+extension)
# sf.write(filename+'.wav', data, 8000)
# run STT
f = open('tmp.wav', 'rb')
files = {'wav':('tmp.wav',f,'audio/wave'),
'json':('tmp.json',open('tmp.json','rb'),'application/json')}
logger.info('POST request to connection string: %s', stt_connection_string)
r = requests.post(stt_connection_string,files=files)
logger.info('Got response: {}'.format(r.text))
stt_json = self.getJSON(r)
r.close()
with codecs.open('tmp1.json', mode='w', encoding='utf-8', errors='ignore') as f:
json.dump(stt_json, fp=f, ensure_ascii=False)
# run STT.PUNCT
f = open('tmp.wav', 'rb')
files = {'json':('tmp1.json',open('tmp1.json','rb'),'application/json')}
logger.info('POST request to connection string: %s', sttp_connection_string)
r = requests.post(sttp_connection_string,files=files)
sttp_json = self.getJSON(r)
r.close()
logger.info('Server response: {}'.format(sttp_json))
# run NER
logger.info('POST request to connection string: %s', ner_connection_string)
sttp_json['stt.dictors']=copy.deepcopy(sttp_json['stt.punct'])
for s in sttp_json['stt.dictors']:
s['text']=s['sent']
r = requests.post(ner_connection_string,json=sttp_json)
ner_json = self.getJSON(r)
r.close()
logger.info('Server response: {}'.format(ner_json))
result = ner_json
return result
except Exception as e:
return {"message": str(e)+"; " + str(e.__doc__)}