This commit is contained in:
Alejandro Moreo Fernandez 2020-01-09 10:50:35 +01:00
parent a8d76b6f52
commit e96968fc45
16 changed files with 11 additions and 0 deletions

View File

@ -0,0 +1,11 @@
from tqdm import tqdm
import re
import sys
def mask_numbers(data, number_mask='numbermask'):
mask = re.compile(r'\b[0-9][0-9.,-]*\b')
masked = []
for text in tqdm(data, desc='masking numbers'):
masked.append(mask.sub(number_mask, text))
return masked