running comparison with refactor branch

This commit is contained in:
andrea 2021-01-29 14:56:20 +01:00
parent 5405f60bd0
commit b98821d3ff
1 changed files with 5 additions and 4 deletions

View File

@ -29,8 +29,9 @@ def index(data, vocab, known_words, analyzer, unk_index, out_of_vocabulary):
unk_count = 0 unk_count = 0
knw_count = 0 knw_count = 0
out_count = 0 out_count = 0
pbar = tqdm(data, desc=f'indexing documents') # pbar = tqdm(data, desc=f'indexing documents')
for text in pbar: # for text in pbar:
for text in data:
words = analyzer(text) words = analyzer(text)
index = [] index = []
for word in words: for word in words:
@ -48,8 +49,8 @@ def index(data, vocab, known_words, analyzer, unk_index, out_of_vocabulary):
index.append(idx) index.append(idx)
indexes.append(index) indexes.append(index)
knw_count += len(index) knw_count += len(index)
pbar.set_description(f'[unk = {unk_count}/{knw_count}={(100.*unk_count/knw_count):.2f}%]' # pbar.set_description(f'[unk = {unk_count}/{knw_count}={(100.*unk_count/knw_count):.2f}%]'
f'[out = {out_count}/{knw_count}={(100.*out_count/knw_count):.2f}%]') # f'[out = {out_count}/{knw_count}={(100.*out_count/knw_count):.2f}%]')
return indexes return indexes