Commit a5341953 authored by Andreas Mueller's avatar Andreas Mueller
Browse files

Merge pull request #91 from amueller/frequency_scaling

Frequency scaling
parents ce367f29 813b3f24
...@@ -17,4 +17,10 @@ wordcloud = WordCloud().generate(text) ...@@ -17,4 +17,10 @@ wordcloud = WordCloud().generate(text)
# Open a plot of the generated image. # Open a plot of the generated image.
plt.imshow(wordcloud) plt.imshow(wordcloud)
plt.axis("off") plt.axis("off")
# take relative word frequencies into account, lower max_font_size
wordcloud = WordCloud(max_font_size=40, relative_scaling=.5).generate(text)
plt.figure()
plt.imshow(wordcloud)
plt.axis("off")
plt.show() plt.show()
...@@ -94,9 +94,6 @@ class WordCloud(object): ...@@ -94,9 +94,6 @@ class WordCloud(object):
height : int (default=200) height : int (default=200)
Height of the canvas. Height of the canvas.
ranks_only : boolean (default=False)
Only use the rank of the words, not the actual counts.
prefer_horizontal : float (default=0.90) prefer_horizontal : float (default=0.90)
The ratio of times to try horizontal fitting as opposed to vertical. The ratio of times to try horizontal fitting as opposed to vertical.
...@@ -133,10 +130,17 @@ class WordCloud(object): ...@@ -133,10 +130,17 @@ class WordCloud(object):
Maximum font size for the largest word. If None, height of the image is Maximum font size for the largest word. If None, height of the image is
used. used.
mode: string (default="RGB") mode : string (default="RGB")
Transparent background will be generated when mode is "RGBA" and Transparent background will be generated when mode is "RGBA" and
background_color is None. background_color is None.
relative_scaling : float (default=0)
Importance of relative word frequencies for font-size.
With relative_scaling=0, only word-ranks are considered.
With relative_scaling=1, a word that is twice as frequent will have twice the size.
If you want to consider the word frequencies and not only their rank, relative_scaling
around .5 often looks good.
Attributes Attributes
---------- ----------
``words_``: list of tuples (string, float) ``words_``: list of tuples (string, float)
...@@ -157,10 +161,10 @@ class WordCloud(object): ...@@ -157,10 +161,10 @@ class WordCloud(object):
""" """
def __init__(self, font_path=None, width=400, height=200, margin=2, def __init__(self, font_path=None, width=400, height=200, margin=2,
ranks_only=False, prefer_horizontal=0.9, mask=None, scale=1, ranks_only=None, prefer_horizontal=0.9, mask=None, scale=1,
color_func=random_color_func, max_words=200, min_font_size=4, color_func=random_color_func, max_words=200, min_font_size=4,
stopwords=None, random_state=None, background_color='black', stopwords=None, random_state=None, background_color='black',
max_font_size=None, font_step=1, mode="RGB"): max_font_size=None, font_step=1, mode="RGB", relative_scaling=0):
if stopwords is None: if stopwords is None:
stopwords = STOPWORDS stopwords = STOPWORDS
if font_path is None: if font_path is None:
...@@ -169,7 +173,6 @@ class WordCloud(object): ...@@ -169,7 +173,6 @@ class WordCloud(object):
self.width = width self.width = width
self.height = height self.height = height
self.margin = margin self.margin = margin
self.ranks_only = ranks_only
self.prefer_horizontal = prefer_horizontal self.prefer_horizontal = prefer_horizontal
self.mask = mask self.mask = mask
self.scale = scale self.scale = scale
...@@ -186,6 +189,13 @@ class WordCloud(object): ...@@ -186,6 +189,13 @@ class WordCloud(object):
max_font_size = height max_font_size = height
self.max_font_size = max_font_size self.max_font_size = max_font_size
self.mode = mode self.mode = mode
if relative_scaling < 0 or relative_scaling > 1:
raise ValueError("relative_scaling needs to be between 0 and 1, got %f."
% relative_scaling)
self.relative_scaling = relative_scaling
if ranks_only is not None:
warnings.warn("ranks_only is deprecated and will be removed as"
" it had no effect. Look into relative_scaling.", DeprecationWarning)
def fit_words(self, frequencies): def fit_words(self, frequencies):
"""Create a word_cloud from words and frequencies. """Create a word_cloud from words and frequencies.
...@@ -262,12 +272,14 @@ class WordCloud(object): ...@@ -262,12 +272,14 @@ class WordCloud(object):
font_sizes, positions, orientations, colors = [], [], [], [] font_sizes, positions, orientations, colors = [], [], [], []
font_size = self.max_font_size font_size = self.max_font_size
last_freq = 1.
# start drawing grey image # start drawing grey image
for word, count in frequencies: for word, freq in frequencies:
# alternative way to set the font size # select the font size
if not self.ranks_only: rs = self.relative_scaling
font_size = min(font_size, int(100 * np.log(count + 100))) if rs != 0:
font_size = int(round((rs * (freq / float(last_freq)) + (1 - rs)) * font_size))
while True: while True:
# try to find a position # try to find a position
font = ImageFont.truetype(self.font_path, font_size) font = ImageFont.truetype(self.font_path, font_size)
...@@ -313,6 +325,7 @@ class WordCloud(object): ...@@ -313,6 +325,7 @@ class WordCloud(object):
# recompute bottom right # recompute bottom right
# the order of the cumsum's is important for speed ?! # the order of the cumsum's is important for speed ?!
occupancy.update(img_array, x, y) occupancy.update(img_array, x, y)
last_freq = freq
self.layout_ = list(zip(frequencies, font_sizes, positions, orientations, colors)) self.layout_ = list(zip(frequencies, font_sizes, positions, orientations, colors))
return self return self
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment