Commit 943ede2e authored by Andreas Mueller's avatar Andreas Mueller

fix sorting and normalization: wasn't applied if frequencies were given directly.

parent 1f63d4ad
...@@ -21,7 +21,8 @@ from .query_integral_image import query_integral_image ...@@ -21,7 +21,8 @@ from .query_integral_image import query_integral_image
item1 = itemgetter(1) item1 = itemgetter(1)
FONT_PATH = os.environ.get("FONT_PATH", os.path.join(os.path.dirname(__file__), "DroidSansMono.ttf")) FONT_PATH = os.environ.get("FONT_PATH", os.path.join(os.path.dirname(__file__),
"DroidSansMono.ttf"))
STOPWORDS = set([x.strip() for x in open(os.path.join(os.path.dirname(__file__), STOPWORDS = set([x.strip() for x in open(os.path.join(os.path.dirname(__file__),
'stopwords')).read().split('\n')]) 'stopwords')).read().split('\n')])
...@@ -215,6 +216,15 @@ class WordCloud(object): ...@@ -215,6 +216,15 @@ class WordCloud(object):
self self
""" """
# make sure frequencies are sorted and normalized
frequencies = sorted(frequencies, key=lambda x: x[1], reverse=True)
frequencies = frequencies[:self.max_words]
# largest entry will be 1
max_frequency = np.max([freq for word, freq in frequencies])
for i, (word, freq) in enumerate(frequencies):
frequencies[i] = word, freq / max_frequency
if self.random_state is not None: if self.random_state is not None:
random_state = self.random_state random_state = self.random_state
else: else:
...@@ -361,27 +371,21 @@ class WordCloud(object): ...@@ -361,27 +371,21 @@ class WordCloud(object):
d3[key_singular] = val_singular + val_plural d3[key_singular] = val_singular + val_plural
del d3[key] del d3[key]
words = sorted(d3.items(), key=item1, reverse=True) self.words_ = d3.items()
words = words[:self.max_words]
maximum = float(max(d3.values()))
for i, (word, count) in enumerate(words):
words[i] = word, count / maximum
self.words_ = words
return words return self.words_
def generate_from_text(self, text): def generate_from_text(self, text):
"""Generate wordcloud from text. """Generate wordcloud from text.
Calls process_text and fit_words. Calls process_text and generate_from_frequencies.
Returns Returns
------- -------
self self
""" """
self.process_text(text) self.process_text(text)
self.fit_words(self.words_) self.generate_from_frequencies(self.words_)
return self return self
def generate(self, text): def generate(self, text):
...@@ -389,7 +393,7 @@ class WordCloud(object): ...@@ -389,7 +393,7 @@ class WordCloud(object):
Alias to generate_from_text. Alias to generate_from_text.
Calls process_text and fit_words. Calls process_text and generate_from_frequencies.
Returns Returns
------- -------
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment