Commit 05bb5624 authored by Baze Petrushev's avatar Baze Petrushev
Browse files

Parse words with unicode letters

parent a8fbf5bc
......@@ -178,7 +178,7 @@ def process_text(text, max_features=200, stopwords=None):
stopwords = STOPWORDS
d = {}
for word in re.findall(r"\w[\w']*", text):
for word in re.findall(r"\w[\w']*", text, flags=re.UNICODE):
if word.isdigit():
continue
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment