Commit b4b01278 authored by Baze Petrushev's avatar Baze Petrushev
Browse files

Minor readability / performance / deprecation improvements

parent 37940a17
......@@ -9,12 +9,15 @@ import os
import sys
import re
import numpy as np
from operator import itemgetter
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
from query_integral_image import query_integral_image
item1 = itemgetter(1)
FONT_PATH = "/usr/share/fonts/truetype/droid/DroidSansMono.ttf"
STOPWORDS = set([x.strip() for x in open(os.path.join(os.path.dirname(__file__),
'stopwords')).read().split('\n')])
......@@ -133,7 +136,7 @@ def fit_words(words, font_path=None, width=400, height=200,
return zip(words, font_sizes, positions, orientations)
def random_color_func(word, font_size, position, orientation):
return "hsl(%d" % random.randint(0, 255) + ", 80%, 50%)"
return "hsl(%d, 80%%, 50%%)" % random.randint(0, 255)
def draw(elements, file_name, font_path=None, width=400, height=200, scale=1,
color_func=random_color_func):
......@@ -187,38 +190,35 @@ def process_text(text, max_features=200, stopwords=None):
continue
# Look in lowercase dict.
if d.has_key(word_lower):
if word_lower in d:
d2 = d[word_lower]
else:
d2 = {}
d[word_lower] = d2
# Look in any case dict.
if d2.has_key(word):
d2[word] += 1
else:
d2[word] = 1
d2[word] = d2.get(word, 0) + 1
d3 = {}
for d2 in d.values():
# Get the most popular case.
first = sorted(d2.iteritems(), key=lambda x: x[1], reverse=True)[0][0]
first = max(d2.iteritems(), key=item1)[0]
d3[first] = sum(d2.values())
# merge plurals into the singular count (simple cases only)
keys = set(d3.keys())
for key, val in d3.items():
for key in d3.keys():
if key.endswith('s'):
key_singular = key[:-1]
if key_singular in d3:
val_plural = d3[key]
val_singular = d3[key_singular]
d3[key_singular] = val_singular + val
d3[key_singular] = val_singular + val_plural
del d3[key]
words = sorted(d3.iteritems(), key=lambda x: x[1], reverse=True)
words = sorted(d3.iteritems(), key=item1, reverse=True)
words = words[:max_features]
maximum = float(max(d3.values()))
for i, (word, count) in enumerate(words):
words[i] = word, count/maximum
return words
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment