Commit 305ee5629be23959eddfb4b514f7a62905664138

Authored by Daniil Grebenkin
0 parents

uploading files

  1 +Metadata-Version: 1.1
  2 +Name: etaloncorpuscreator
  3 +Version: 0.1
  4 +Summary: command-line package for automatical creation of russian language audio corpus from YouTube audiotracks and subtitles with using forced alignment by sphinx3
  5 +Home-page: https://github.com/dangrebenkin/audiocorpusbuilder
  6 +Author: Daniel Grebenkin
  7 +Author-email: d.grebenkin@g.nsu.ru
  8 +License: Apache License Version 2.0
  9 +Description: UNKNOWN
  10 +Keywords: dataset,librosa,youtube-dl,youtube,forced alignment,sphinx,sphinx3
  11 +Platform: Linux
  12 +Classifier: Development Status :: 3 - Alpha
  13 +Classifier: Intended Audience :: Science/Research
  14 +Classifier: Intended Audience :: Developers
  15 +Classifier: Topic :: Software Development
  16 +Classifier: Topic :: Scientific/Engineering
  17 +Classifier: License :: OSI Approved :: Apache Software License
  18 +Classifier: Programming Language :: Python :: 3.6
  19 +Classifier: Programming Language :: Python :: 3.7
  20 +Classifier: Programming Language :: Python :: 3.8
... ...
  1 +# About
  2 +
  3 +Etaloncorpuscreator-package was made to automatically create a russian language audio corpus from YouTube videotracks playlists: it downloads video's audio and subtitles, makes pairs "sound-text", doing forced alignment and saves new corpus and varieties.
  4 +
  5 +# Installing
  6 +
  7 +For installation you need Python 3.6 or later, OC Linux and sphinx3 on your local machine.
  8 +
  9 +# Start
  10 +
  11 +To run etaloncorpuscreator you shoild prepare directories for audiotracks, subtitles, results. Also you need to create playlists.txt with playlists' links, every link should be on the separate line.
  12 +
  13 +# Arguments
  14 +
  15 +All arguments are required for program use.
  16 +
  17 +1. -p URL_list
  18 +
  19 +Playlists txt-file path.
  20 +
  21 +2. -a directory_audio
  22 +
  23 +Path to download audiotracks.
  24 +
  25 +3. -s directory_subtitles
  26 +
  27 +Path to download subtitles.
  28 +
  29 +4. -r directory_results
  30 +
  31 +Path for audio results.
  32 +
  33 +5. -am sphinx_model_path
  34 +
  35 +Your acoustic model path.
  36 +
  37 +6. -dict dictionary_path
  38 +
  39 +Your dictionary path.
  40 +
  41 +7. -dict_f dictionary_filler_path
  42 +
  43 +Your dictionary filler path.
  44 +
  45 +8. -ar directory_alignment_results
  46 +
  47 +Path for alignment results.
  48 +
  49 +# Usage
  50 +
  51 +eccr [-p URL_list] [-a directory_audio] [-s directory_subtitles] [-r directory_results] [-am sphinx_model_path] [-dict dictionary_path] [-dict_f dictionary_filler_path] [-ar directory_alignment_results]
  52 +
  53 +# Example
  54 +
  55 +eccr -p playlists.txt -a Audio -s Subs -r Results -am ./voxforge_ru_sphinx/model_parameters/voxforge_ru.cd_cont_200 -dict ./voxforge_ru_sphinx/voxforge_ru.dic -dict_f ./voxforge_ru_sphinx/voxforge_ru.filler -ar Alignment
... ...
  1 +Metadata-Version: 1.1
  2 +Name: etaloncorpuscreator
  3 +Version: 0.1
  4 +Summary: command-line package for automatical creation of russian language audio corpus from YouTube audiotracks and subtitles with using forced alignment by sphinx3
  5 +Home-page: https://github.com/dangrebenkin/audiocorpusbuilder
  6 +Author: Daniel Grebenkin
  7 +Author-email: d.grebenkin@g.nsu.ru
  8 +License: Apache License Version 2.0
  9 +Description: UNKNOWN
  10 +Keywords: dataset,librosa,youtube-dl,youtube,forced alignment,sphinx,sphinx3
  11 +Platform: Linux
  12 +Classifier: Development Status :: 3 - Alpha
  13 +Classifier: Intended Audience :: Science/Research
  14 +Classifier: Intended Audience :: Developers
  15 +Classifier: Topic :: Software Development
  16 +Classifier: Topic :: Scientific/Engineering
  17 +Classifier: License :: OSI Approved :: Apache Software License
  18 +Classifier: Programming Language :: Python :: 3.6
  19 +Classifier: Programming Language :: Python :: 3.7
  20 +Classifier: Programming Language :: Python :: 3.8
... ...
  1 +README.md
  2 +setup.cfg
  3 +setup.py
  4 +etaloncorpuscreator/__init__.py
  5 +etaloncorpuscreator/__main__.py
  6 +etaloncorpuscreator/corpus_creator.py
  7 +etaloncorpuscreator.egg-info/PKG-INFO
  8 +etaloncorpuscreator.egg-info/SOURCES.txt
  9 +etaloncorpuscreator.egg-info/dependency_links.txt
  10 +etaloncorpuscreator.egg-info/entry_points.txt
  11 +etaloncorpuscreator.egg-info/requires.txt
  12 +etaloncorpuscreator.egg-info/top_level.txt
\ No newline at end of file
... ...
  1 +[console_scripts]
  2 +eccr = etaloncorpuscreator.corpus_creator:main
  3 +
... ...
  1 +pandas>=1.1.1
  2 +audioread>=2.0.0
  3 +numpy>=1.15.0
  4 +packaging>=18
  5 +scipy>=1.0.0
  6 +scikit-learn!=0.19.0,>=0.14.0
  7 +joblib>=0.14
  8 +decorator>=3.0.0
  9 +resampy>=0.2.2
  10 +numba==0.48
  11 +soundfile>=0.9.0
  12 +pooch>=1.0
  13 +librosa==0.7.0
  14 +youtube-dl>=2020.1.1
... ...
  1 +etaloncorpuscreator
... ...
  1 +# -*- coding: utf-8 -*-
  2 +
  3 +from .corpus_creator import main
  4 +main()
... ...
  1 +import os
  2 +import re
  3 +import shutil
  4 +import codecs
  5 +import pandas
  6 +import librosa
  7 +import argparse
  8 +import subprocess
  9 +from datetime import datetime
  10 +
  11 +# audiocorpusbuilder
  12 +
  13 +results = []
  14 +subtitles = []
  15 +wavs = []
  16 +subtitles_file = []
  17 +startpoints =[]
  18 +finishpoints =[]
  19 +filenamecounter = 1
  20 +counter = 1
  21 +total_number = 0
  22 +
  23 +wav_names = []
  24 +variaties = []
  25 +txt_names = []
  26 +
  27 +def getting_sound_and_subtitles(link, directory_audio, directory_subtitles,directory_results,sphinx_model_path,dictionary_path,dictionary_filler_path,directory_results2):
  28 +
  29 + global subtitles,wavs,subtitle_file,startpoints,finishpoints,counter
  30 +
  31 + list_of_videos = "youtube-dl -j --flat-playlist "+link+" | jq -r '.id' | sed 's_^_https://youtu.be/_' >"+directory_results+"videos.txt"
  32 + list_of_videos_str = os.popen(list_of_videos).read()
  33 + with open(directory_results+'videos.txt','r') as videos_in_playlist:
  34 + lots_of_videos_demo = videos_in_playlist.readlines()
  35 + lots_of_videos_demo = list(lots_of_videos_demo)
  36 + for video in lots_of_videos_demo:
  37 + amount_subs = len(os.listdir(directory_subtitles))
  38 + pre_sub = "youtube-dl -i --skip-download --write-sub --sub-lang ru -o '"+directory_subtitles+"%(title)s.%(ext)s'"+" "+video
  39 + sub = os.popen(pre_sub).read()
  40 + new_amount_subs = len(os.listdir(directory_subtitles))
  41 + if amount_subs+1 == new_amount_subs:
  42 + pre_audio = "youtube-dl -i --extract-audio --audio-format wav -o '"+directory_audio+"%(title)s.%(ext)s'"+" "+video
  43 + audio = os.popen(pre_audio).read()
  44 + else:
  45 + pre_sub = "youtube-dl -i --skip-download --write-auto-sub --sub-lang ru -o '"+directory_subtitles+"%(title)s.%(ext)s'"+" "+video
  46 + sub = os.popen(pre_sub).read()
  47 + another_new_amount_subs = len(os.listdir(directory_subtitles))
  48 + if amount_subs+1 == another_new_amount_subs:
  49 + pre_audio = "youtube-dl -i --extract-audio --audio-format wav -o '"+directory_audio+"%(title)s.%(ext)s'"+" "+video
  50 + audio = os.popen(pre_audio).read()
  51 + else:
  52 + pass
  53 +
  54 +
  55 + subtitlesfiles = os.listdir(directory_subtitles)
  56 + for file2 in subtitlesfiles:
  57 + subtitles.append(file2)
  58 + audiofiles = os.listdir(directory_audio)
  59 + for file1 in audiofiles:
  60 + wavs.append(file1)
  61 +
  62 + subtitles.sort()
  63 + wavs.sort()
  64 + counter_limit = len(wavs)
  65 +
  66 + for wav in wavs:
  67 + wavdivision(wav,directory_audio,directory_results,directory_subtitles,counter_limit)
  68 + subtitles_file.clear()
  69 + startpoints.clear()
  70 + finishpoints.clear()
  71 +
  72 + wavs.clear()
  73 + subtitles.clear()
  74 + counter_limit=0
  75 + counter=1
  76 + used_wavs = [os.path.join(directory_audio,w) for w in os.listdir(directory_audio)]
  77 + for w in used_wavs:
  78 + os.remove(w)
  79 + used_subs = [os.path.join(directory_subtitles,s) for s in os.listdir(directory_subtitles)]
  80 + for s in used_subs:
  81 + os.remove(s)
  82 + os.remove(directory_results+'videos.txt')
  83 +
  84 + os.chdir(directory_results)
  85 + f = os.listdir(directory_results)
  86 +
  87 + wavs = []
  88 + txts = []
  89 +
  90 + for folder in f:
  91 + path = os.path.abspath(folder)
  92 + os.chdir(path)
  93 + files_in_folder = os.listdir(path)
  94 + files_in_folder.sort()
  95 +
  96 + for e in files_in_folder:
  97 + u = os.path.abspath(e)
  98 + if 'wav' in e:
  99 + e = os.path.abspath(e)
  100 + wavs.append(e)
  101 + if 'txt' in e:
  102 + e = os.path.abspath(e)
  103 + txts.append(e)
  104 +
  105 + os.chdir(directory_results)
  106 +
  107 + preparations(sphinx_model_path,wavs,txts,dictionary_path,dictionary_filler_path,directory_results2)
  108 +
  109 +
  110 +def subtitlesdivision(file,directory_subtitles):
  111 +
  112 + global subtitles_file,startpoints,finishpoints
  113 +
  114 + with open(directory_subtitles+file, 'r') as subtitles2:
  115 + k = subtitles2.readlines()
  116 + k = list(k)
  117 + time_moments = []
  118 + for string in k:
  119 + piece_of_time = re.findall('(\d{2}:\d{2}:\d{2}.\d{3}) --> (\d{2}:\d{2}:\d{2}.\d{3})', string)
  120 + if piece_of_time != []:
  121 + string_index = k.index(string)
  122 + string_index_plus_one = string_index+1
  123 + if k[string_index_plus_one] != []:
  124 + j = k[string_index_plus_one]
  125 + ko = k[string_index_plus_one-1]
  126 + piece_of_time2 = re.findall('(\d{2}:\d{2}:\d{2}.\d{3}) --> (\d{2}:\d{2}:\d{2}.\d{3})', ko)
  127 + j2 = k[string_index_plus_one+1]
  128 + if j2 != []:
  129 + subtitle = j+j2
  130 + subtitle = subtitle.lower()
  131 + subtitle = re.findall(r'([А-я]\w+|[а-я]|[0-9]\d+)', subtitle)
  132 + subtitle = ' '.join(subtitle)
  133 + subtitles_file.append(subtitle)
  134 + time_moments.append(piece_of_time2)
  135 + else:
  136 + j = j.lower()
  137 + j = re.findall(r'([А-я]\w+|[а-я]|[0-9]\d+)', j)
  138 + j = ' '.join(j)
  139 + subtitles_file.append(j)
  140 + time_moments.append(piece_of_time2)
  141 +
  142 + for moment in time_moments:
  143 + for time_seconds in moment:
  144 + o1 = re.findall('(\d{2}):(\d{2}):(\d{2}).(\d{3})',time_seconds[0])
  145 + o2 = re.findall('(\d{2}):(\d{2}):(\d{2}).(\d{3})',time_seconds[1])
  146 +
  147 + for element1 in o1:
  148 + h2 = int(element1[0])
  149 + m2 = int(element1[1])
  150 + s2 = int(element1[2])
  151 + ms2 = (int(element1[3])) * 1000
  152 + g1 = datetime(2019, 5, 6, 0, 0, 0, 0)
  153 + g2 = datetime(2019, 5, 6, h2, m2, s2, ms2)
  154 + g3 = (g2 - g1)
  155 + g51 = g3.total_seconds()
  156 + startpoints.append(g51)
  157 +
  158 + for element2 in o2:
  159 + g1 = datetime(2019, 5, 6, 0, 0, 0, 0)
  160 + h22 = int(element2[0])
  161 + m22 = int(element2[1])
  162 + s22 = int(element2[2])
  163 + ms22 = (int(element2[3])) * 1000
  164 + g22 = datetime(2019, 5, 6, h22, m22, s22, ms22)
  165 + g32 = (g22 - g1)
  166 + g52 = g32.total_seconds()
  167 + finishpoints.append(g52)
  168 +
  169 +def wavdivision(sound,directory_audio,directory_results,directory_subtitles,counter_limit):
  170 +
  171 + global subtitles,filenamecounter,subtitles_file,startpoints,finishpoints,counter
  172 +
  173 + for textfile in subtitles:
  174 + subtitlesdivision(textfile,directory_subtitles)
  175 + subtitles.remove(textfile)
  176 + break
  177 + y, sr = librosa.load(directory_audio+sound,mono=True)
  178 +
  179 + def finalmoment(start,finish,filenamecounter):
  180 + j = y[int(start)*sr:int(finish)*sr]
  181 + os.chdir(directory_results+new_folder)
  182 + librosa.output.write_wav(str(filenamecounter)+'.wav', j, sr)
  183 + for subtitletext in subtitles_file:
  184 + new_file_name_for_text = str(filenamecounter)+'.txt'
  185 + with open(new_file_name_for_text, 'w') as gh:
  186 + gh.write(subtitletext)
  187 + subtitles_file.remove(subtitletext)
  188 + break
  189 +
  190 + os.chdir(directory_results)
  191 + new_folder = str(sound)
  192 + os.mkdir(new_folder)
  193 + os.chdir(directory_results+new_folder)
  194 +
  195 + for moment1,moment2 in zip(startpoints,finishpoints):
  196 + finalmoment(moment1, moment2,filenamecounter)
  197 + filenamecounter += 1
  198 + print (counter,' from ',counter_limit)
  199 + counter+=1
  200 +
  201 +
  202 +# sphinxforcealigner
  203 +
  204 +def preparations(sphinx_model_path,wavs,txts,d_path,d_f_path,directory_results2):
  205 +
  206 + global results,wav_names,variaties,txt_names
  207 +
  208 + dir = os.path.join(directory_results2,"f_ali")
  209 + if not os.path.exists(dir):
  210 + os.mkdir(dir)
  211 + os.chdir(dir)
  212 + else:
  213 + os.chdir(dir)
  214 +
  215 + phlabdir = os.path.join(dir,"phsegdir")
  216 +
  217 + # slicing files to 100
  218 +
  219 + while len(wavs) != 0 and len(txts) != 0:
  220 +
  221 + slice_audios = wavs[0:100]
  222 + slice_annotations = txts[0:100]
  223 +
  224 + os.mkdir('txt')
  225 + os.mkdir('wav')
  226 + os.mkdir('phsegdir')
  227 +
  228 + #preparing .transcription
  229 +
  230 + for t in slice_annotations:
  231 + shutil.copy(t,'txt')
  232 +
  233 + for t in slice_annotations:
  234 + with codecs.open (t,encoding="utf8",errors='ignore') as annotation:
  235 + text_string = annotation.read()
  236 + real_string = text_string.replace('\n','')
  237 + with codecs.open('f_ali.transcription','a',encoding="utf8",errors='ignore') as text_file:
  238 + t = os.path.basename(t)
  239 + t = re.sub('.txt','',t)
  240 + text_file.write('<s>'+' '+str(real_string)+' '+'</s>'+' '+'('+t+')'+'\n')
  241 +
  242 + #preparing .fileids
  243 +
  244 + for t in slice_audios:
  245 + shutil.copy(t,'wav')
  246 +
  247 + list_w = os.listdir('wav')
  248 +
  249 + for path in list_w:
  250 + a = 'wav/'+path
  251 +
  252 + #convertion
  253 + audio_dir = os.path.dirname(a)
  254 + command1 = "sox '"+a+"' -r 16000 -b 16 -c 1 '"+audio_dir+"/temporary_audio_wav.wav'"
  255 + execute = os.popen(command1).read()
  256 + os.rename (audio_dir+'/temporary_audio_wav.wav',a)
  257 +
  258 + for s in slice_audios:
  259 + with open ('f_ali.fileids','a') as wav_scp_file:
  260 + s = os.path.basename(s)
  261 + s = re.sub('.wav',' ',s)
  262 + s = 'wav/'+s
  263 + wav_scp_file.write(str(s)+'\n')
  264 +
  265 + #preparing features and doing alignment
  266 +
  267 + command1 = 'export LD_LIBRARY_PATH=/usr/local/lib'
  268 + command2 = 'cd '+dir
  269 + command3 = 'sphinx_fe -argfile '+sphinx_model_path+'/feat.params -samprate 16000 -c f_ali.fileids -di . -do . -ei wav -eo mfc -mswav yes'
  270 + command4 = 'sphinx3_align -hmm '+sphinx_model_path+' -dict '+d_path+' -fdict '+d_f_path+' -ctl '+dir+'/f_ali.fileids -cepdir . -cepext .mfc -insent '+dir+'/f_ali.transcription -outsent '+dir+'/f_ali.out -phsegdir '+dir+'/phsegdir'
  271 +
  272 + execute1 = os.popen(command1).read()
  273 + execute2 = os.popen(command2).read()
  274 + execute3 = os.popen(command3).read()
  275 + execute4 = os.popen(command4).read()
  276 +
  277 + # getting results
  278 +
  279 + result_path = os.path.join(dir,"f_ali.out")
  280 + with codecs.open(result_path,'r',encoding="utf8",errors='ignore') as result_file:
  281 + res = result_file.readlines()
  282 + for text_res in res:
  283 + results.append(text_res)
  284 + os.remove(result_path)
  285 +
  286 + wavs = list(set(wavs) - set(slice_audios))
  287 + txts = list(set(txts) - set(slice_annotations))
  288 +
  289 + wavs.sort()
  290 + txts.sort()
  291 +
  292 + #______
  293 +
  294 + all_wavs_path = os.path.join(dir,'wav')
  295 + all_txts_path = os.path.join(dir,'txt')
  296 +
  297 + list_results_files = os.listdir(phlabdir)
  298 +
  299 + os.chdir(phlabdir)
  300 +
  301 + for result_file in list_results_files:
  302 +
  303 + wav_name = re.sub('.phseg','.wav',result_file)
  304 + txt_name = re.sub('.phseg','.txt',result_file)
  305 +
  306 + wav_name = os.path.join(all_wavs_path,wav_name)
  307 + txt_name = os.path.join(all_txts_path,txt_name)
  308 +
  309 + shutil.copy(wav_name,dir)
  310 + shutil.copy(txt_name,dir)
  311 +
  312 + new_wav_name = os.path.join(dir,wav_name)
  313 + new_wav_name = re.sub('wav/','',new_wav_name)
  314 + new_txt_name = os.path.join(dir,txt_name)
  315 + new_txt_name = re.sub('txt/','',new_txt_name)
  316 +
  317 + wav_names.append(new_wav_name)
  318 + txt_names.append(new_txt_name)
  319 +
  320 + u = codecs.open (result_file, 'r', encoding = 'utf-8',errors='ignore')
  321 + u = u.readlines()
  322 + for line in u:
  323 + if 'Total score:' in line:
  324 + variaty = re.findall('\d+',line)
  325 + variaty = '-'+variaty[0]
  326 + variaties.append(variaty)
  327 +
  328 + #removing files and directories for new files
  329 +
  330 + os.chdir(dir)
  331 + shutil.rmtree('txt/')
  332 + shutil.rmtree('wav/')
  333 + shutil.rmtree('phsegdir/')
  334 + os.remove('f_ali.transcription')
  335 + os.remove('f_ali.fileids')
  336 +
  337 + total_result = os.path.join(directory_results2,'results.txt')
  338 + with codecs.open(total_result,'a',encoding="utf8",errors='ignore') as result_total:
  339 + for el in results:
  340 + result_total.write(el)
  341 + results.clear()
  342 +
  343 +# arguments parser
  344 +
  345 +def main():
  346 +
  347 + parser = argparse.ArgumentParser()
  348 +
  349 + parser.add_argument('-p', '--playlist_file', dest='URL_list', type=str,
  350 + help='playlists txt-file path', required=True)
  351 + parser.add_argument('-a', '--audio_path', dest='directory_audio', type=str,
  352 + help='path to download audiotracks', required=True)
  353 + parser.add_argument('-s','--subs_path', dest='directory_subtitles', type=str,
  354 + help='path to download subtitles', required=True)
  355 + parser.add_argument('-r', '--results_path', dest='directory_results', type=str,
  356 + help='path for results', required=True)
  357 +
  358 + parser.add_argument('-am', '--sphinx_model_path', dest='sphinx_model_path', type=str,
  359 + help='your acoustic model path', required=True)
  360 + parser.add_argument('-dict', '--dictionary_path', dest='dictionary_path', type=str,
  361 + help='your dictionary path', required=True)
  362 + parser.add_argument('-dict_f', '--dictionary_filler_path', dest='dictionary_filler_path', type=str,
  363 + help='your dictionary filler path', required=True)
  364 + parser.add_argument('-ar', '--ali_results_path', dest='directory_alignment_results', type=str,
  365 + help='path for alignment results', required=True)
  366 +
  367 + args = parser.parse_args()
  368 +
  369 + directory_audio = os.path.abspath(args.directory_audio)+'/'
  370 + directory_subtitles = os.path.abspath(args.directory_subtitles)+'/'
  371 + directory_results = os.path.abspath(args.directory_results)+'/'
  372 + URL_list = os.path.abspath(args.URL_list)
  373 + sphinx_model_path = os.path.abspath(args.sphinx_model_path)
  374 + dictionary_path = os.path.abspath(args.dictionary_path)
  375 + dictionary_filler_path = os.path.abspath(args.dictionary_filler_path)
  376 + directory_results2 = os.path.abspath(args.directory_alignment_results)
  377 +
  378 + with open(URL_list, 'r') as playlists_links:
  379 + lots_of_playlists = playlists_links.readlines()
  380 + lots_of_playlists = list(lots_of_playlists)
  381 + for i in lots_of_playlists:
  382 + i = re.sub("\n", '', i)
  383 + if i=='':
  384 + pass
  385 + else:
  386 + getting_sound_and_subtitles(i,directory_audio, directory_subtitles,directory_results,sphinx_model_path,dictionary_path,dictionary_filler_path,directory_results2)
  387 + wavs.clear()
  388 +
  389 + #creating total csv
  390 +
  391 + os.chdir (directory_results2)
  392 +
  393 + dict = {'wav_dir': wav_names , 'txt_dir': txt_names, 'variaty': variaties}
  394 + df = pandas.DataFrame(dict)
  395 +
  396 + df.to_csv ('Total_results.csv', index = False, header=True)
  397 +
  398 +
... ...
  1 +[metadata]
  2 +description-file = README.md
  3 +
  4 +[egg_info]
  5 +tag_build =
  6 +tag_date = 0
  7 +
... ...
  1 +from setuptools import setup, find_packages, Extension
  2 +from os.path import join, dirname
  3 +
  4 +setup(
  5 + name='etaloncorpuscreator',
  6 + version='0.1',
  7 + description='command-line package for automatical creation of russian language audio corpus from YouTube audiotracks and subtitles with using forced alignment by sphinx3',
  8 + url='https://github.com/dangrebenkin/audiocorpusbuilder',
  9 + author='Daniel Grebenkin',
  10 + author_email = 'd.grebenkin@g.nsu.ru',
  11 + license='Apache License Version 2.0',
  12 + keywords=['dataset', 'librosa', 'youtube-dl', 'youtube', 'forced alignment', 'sphinx','sphinx3'],
  13 + packages = find_packages(),
  14 + platforms = 'Linux',
  15 + entry_points ={
  16 + 'console_scripts': [
  17 + 'eccr = etaloncorpuscreator.corpus_creator:main'
  18 + ]
  19 + },
  20 + install_requires=[
  21 + 'pandas >= 1.1.1',
  22 + 'audioread >= 2.0.0',
  23 + 'numpy >= 1.15.0',
  24 + 'packaging >= 18',
  25 + 'scipy >= 1.0.0',
  26 + 'scikit-learn >= 0.14.0, != 0.19.0',
  27 + 'joblib >= 0.14',
  28 + 'decorator >= 3.0.0',
  29 + 'resampy >= 0.2.2',
  30 + 'numba == 0.48',
  31 + 'soundfile >= 0.9.0',
  32 + 'pooch >= 1.0',
  33 + 'librosa==0.7.0',
  34 + 'youtube-dl>=2020.1.1'
  35 + ],
  36 + classifiers=[
  37 + 'Development Status :: 3 - Alpha',
  38 + 'Intended Audience :: Science/Research',
  39 + 'Intended Audience :: Developers',
  40 + 'Topic :: Software Development',
  41 + 'Topic :: Scientific/Engineering',
  42 + 'License :: OSI Approved :: Apache Software License',
  43 + 'Programming Language :: Python :: 3.6',
  44 + 'Programming Language :: Python :: 3.7',
  45 + 'Programming Language :: Python :: 3.8']
  46 +)
  47 +
... ...
Please register or login to post a comment