running comparison with refactor branch
This commit is contained in:
parent
5405f60bd0
commit
b98821d3ff
|
@ -29,8 +29,9 @@ def index(data, vocab, known_words, analyzer, unk_index, out_of_vocabulary):
|
||||||
unk_count = 0
|
unk_count = 0
|
||||||
knw_count = 0
|
knw_count = 0
|
||||||
out_count = 0
|
out_count = 0
|
||||||
pbar = tqdm(data, desc=f'indexing documents')
|
# pbar = tqdm(data, desc=f'indexing documents')
|
||||||
for text in pbar:
|
# for text in pbar:
|
||||||
|
for text in data:
|
||||||
words = analyzer(text)
|
words = analyzer(text)
|
||||||
index = []
|
index = []
|
||||||
for word in words:
|
for word in words:
|
||||||
|
@ -48,8 +49,8 @@ def index(data, vocab, known_words, analyzer, unk_index, out_of_vocabulary):
|
||||||
index.append(idx)
|
index.append(idx)
|
||||||
indexes.append(index)
|
indexes.append(index)
|
||||||
knw_count += len(index)
|
knw_count += len(index)
|
||||||
pbar.set_description(f'[unk = {unk_count}/{knw_count}={(100.*unk_count/knw_count):.2f}%]'
|
# pbar.set_description(f'[unk = {unk_count}/{knw_count}={(100.*unk_count/knw_count):.2f}%]'
|
||||||
f'[out = {out_count}/{knw_count}={(100.*out_count/knw_count):.2f}%]')
|
# f'[out = {out_count}/{knw_count}={(100.*out_count/knw_count):.2f}%]')
|
||||||
return indexes
|
return indexes
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue