Commit db2a5362 authored by Baze Petrushev's avatar Baze Petrushev
Browse files

Don't regex match unicode if the passed text is not unicode

parent b4b01278
......@@ -181,7 +181,8 @@ def process_text(text, max_features=200, stopwords=None):
stopwords = STOPWORDS
d = {}
for word in re.findall(r"\w[\w']*", text, flags=re.UNICODE):
flags = re.UNICODE if type(text) is unicode else 0
for word in re.findall(r"\w[\w']*", text, flags=flags):
if word.isdigit():
continue
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment