Commit 07e17db6 authored by Andreas Mueller's avatar Andreas Mueller
Browse files

add "generate_from_text" and "generate_from_frequencies" functions for easier discoverability.

parent e9015fd5
...@@ -120,19 +120,43 @@ class WordCloud(object): ...@@ -120,19 +120,43 @@ class WordCloud(object):
max_font_size = height max_font_size = height
self.max_font_size = max_font_size self.max_font_size = max_font_size
def fit_words(self, words): def fit_words(self, frequencies):
"""Generate the positions for words. """Create a word_cloud from words and frequencies.
Alias to generate_from_frequencies.
Parameters Parameters
---------- ----------
words : array of tuples frequencies : array of tuples
A tuple contains the word and its frequency. A tuple contains the word and its frequency.
Returns Returns
------- -------
layout_ : list of tuples (string, int, (int, int), int, color)) self
Encodes the fitted word cloud. Encodes for each word the string, font
size, position, orientation and color. Notes
-----
Larger canvases with make the code significantly slower. If you need a large
word cloud, run this function with a lower canvas size, and draw it with a
larger scale.
In the current form it actually just uses the rank of the counts, i.e. the
relative differences don't matter. Play with setting the font_size in the
main loop for different styles.
"""
return self.generate_from_frequencies(frequencies)
def generate_from_frequencies(self, frequencies):
"""Create a word_cloud from words and frequencies.
Parameters
----------
frequencies : array of tuples
A tuple contains the word and its frequency.
Returns
-------
self
Notes Notes
----- -----
...@@ -149,9 +173,9 @@ class WordCloud(object): ...@@ -149,9 +173,9 @@ class WordCloud(object):
else: else:
random_state = Random() random_state = Random()
if len(words) <= 0: if len(frequencies) <= 0:
print("We need at least 1 word to plot a word cloud, got %d." print("We need at least 1 word to plot a word cloud, got %d."
% len(words)) % len(frequencies))
if self.mask is not None: if self.mask is not None:
width = self.mask.shape[1] width = self.mask.shape[1]
...@@ -171,7 +195,7 @@ class WordCloud(object): ...@@ -171,7 +195,7 @@ class WordCloud(object):
font_size = self.max_font_size font_size = self.max_font_size
# start drawing grey image # start drawing grey image
for word, count in words: for word, count in frequencies:
# alternative way to set the font size # alternative way to set the font size
if not self.ranks_only: if not self.ranks_only:
font_size = min(font_size, int(100 * np.log(count + 100))) font_size = min(font_size, int(100 * np.log(count + 100)))
...@@ -230,8 +254,8 @@ class WordCloud(object): ...@@ -230,8 +254,8 @@ class WordCloud(object):
integral[x:, y:] = partial_integral integral[x:, y:] = partial_integral
self.layout_ = list(zip(words, font_sizes, positions, orientations, colors)) self.layout_ = list(zip(frequencies, font_sizes, positions, orientations, colors))
return self.layout_ return self
def process_text(self, text): def process_text(self, text):
"""Splits a long text into words, eliminates the stopwords. """Splits a long text into words, eliminates the stopwords.
...@@ -254,8 +278,8 @@ class WordCloud(object): ...@@ -254,8 +278,8 @@ class WordCloud(object):
""" """
d = {} d = {}
flags = re.UNICODE if sys.version < '3' and \ flags = (re.UNICODE if sys.version < '3' and type(text) is unicode
type(text) is unicode else 0 else 0)
for word in re.findall(r"\w[\w']*", text, flags=flags): for word in re.findall(r"\w[\w']*", text, flags=flags):
if word.isdigit(): if word.isdigit():
continue continue
...@@ -300,7 +324,7 @@ class WordCloud(object): ...@@ -300,7 +324,7 @@ class WordCloud(object):
return words return words
def generate(self, text): def generate_from_text(self, text):
"""Generate wordcloud from text. """Generate wordcloud from text.
Calls process_text and fit_words. Calls process_text and fit_words.
...@@ -313,6 +337,19 @@ class WordCloud(object): ...@@ -313,6 +337,19 @@ class WordCloud(object):
self.fit_words(self.words_) self.fit_words(self.words_)
return self return self
def generate(self, text):
"""Generate wordcloud from text.
Alias to generate_from_text.
Calls process_text and fit_words.
Returns
-------
self
"""
return self.generate_from_text(text)
def _check_generated(self): def _check_generated(self):
"""Check if layout_ was computed, otherwise raise error.""" """Check if layout_ was computed, otherwise raise error."""
if not hasattr(self, "layout_"): if not hasattr(self, "layout_"):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment