Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
keeleliin
word_cloud_est
Commits
a5341953
Commit
a5341953
authored
Aug 18, 2015
by
Andreas Mueller
Browse files
Merge pull request #91 from amueller/frequency_scaling
Frequency scaling
parents
ce367f29
813b3f24
Changes
2
Hide whitespace changes
Inline
Side-by-side
examples/simple.py
View file @
a5341953
...
...
@@ -17,4 +17,10 @@ wordcloud = WordCloud().generate(text)
# Open a plot of the generated image.
plt
.
imshow
(
wordcloud
)
plt
.
axis
(
"off"
)
# take relative word frequencies into account, lower max_font_size
wordcloud
=
WordCloud
(
max_font_size
=
40
,
relative_scaling
=
.
5
).
generate
(
text
)
plt
.
figure
()
plt
.
imshow
(
wordcloud
)
plt
.
axis
(
"off"
)
plt
.
show
()
wordcloud/wordcloud.py
View file @
a5341953
...
...
@@ -94,9 +94,6 @@ class WordCloud(object):
height : int (default=200)
Height of the canvas.
ranks_only : boolean (default=False)
Only use the rank of the words, not the actual counts.
prefer_horizontal : float (default=0.90)
The ratio of times to try horizontal fitting as opposed to vertical.
...
...
@@ -133,10 +130,17 @@ class WordCloud(object):
Maximum font size for the largest word. If None, height of the image is
used.
mode: string (default="RGB")
mode
: string (default="RGB")
Transparent background will be generated when mode is "RGBA" and
background_color is None.
relative_scaling : float (default=0)
Importance of relative word frequencies for font-size.
With relative_scaling=0, only word-ranks are considered.
With relative_scaling=1, a word that is twice as frequent will have twice the size.
If you want to consider the word frequencies and not only their rank, relative_scaling
around .5 often looks good.
Attributes
----------
``words_``: list of tuples (string, float)
...
...
@@ -157,10 +161,10 @@ class WordCloud(object):
"""
def
__init__
(
self
,
font_path
=
None
,
width
=
400
,
height
=
200
,
margin
=
2
,
ranks_only
=
Fals
e
,
prefer_horizontal
=
0.9
,
mask
=
None
,
scale
=
1
,
ranks_only
=
Non
e
,
prefer_horizontal
=
0.9
,
mask
=
None
,
scale
=
1
,
color_func
=
random_color_func
,
max_words
=
200
,
min_font_size
=
4
,
stopwords
=
None
,
random_state
=
None
,
background_color
=
'black'
,
max_font_size
=
None
,
font_step
=
1
,
mode
=
"RGB"
):
max_font_size
=
None
,
font_step
=
1
,
mode
=
"RGB"
,
relative_scaling
=
0
):
if
stopwords
is
None
:
stopwords
=
STOPWORDS
if
font_path
is
None
:
...
...
@@ -169,7 +173,6 @@ class WordCloud(object):
self
.
width
=
width
self
.
height
=
height
self
.
margin
=
margin
self
.
ranks_only
=
ranks_only
self
.
prefer_horizontal
=
prefer_horizontal
self
.
mask
=
mask
self
.
scale
=
scale
...
...
@@ -186,6 +189,13 @@ class WordCloud(object):
max_font_size
=
height
self
.
max_font_size
=
max_font_size
self
.
mode
=
mode
if
relative_scaling
<
0
or
relative_scaling
>
1
:
raise
ValueError
(
"relative_scaling needs to be between 0 and 1, got %f."
%
relative_scaling
)
self
.
relative_scaling
=
relative_scaling
if
ranks_only
is
not
None
:
warnings
.
warn
(
"ranks_only is deprecated and will be removed as"
" it had no effect. Look into relative_scaling."
,
DeprecationWarning
)
def
fit_words
(
self
,
frequencies
):
"""Create a word_cloud from words and frequencies.
...
...
@@ -262,12 +272,14 @@ class WordCloud(object):
font_sizes
,
positions
,
orientations
,
colors
=
[],
[],
[],
[]
font_size
=
self
.
max_font_size
last_freq
=
1.
# start drawing grey image
for
word
,
count
in
frequencies
:
# alternative way to set the font size
if
not
self
.
ranks_only
:
font_size
=
min
(
font_size
,
int
(
100
*
np
.
log
(
count
+
100
)))
for
word
,
freq
in
frequencies
:
# select the font size
rs
=
self
.
relative_scaling
if
rs
!=
0
:
font_size
=
int
(
round
((
rs
*
(
freq
/
float
(
last_freq
))
+
(
1
-
rs
))
*
font_size
))
while
True
:
# try to find a position
font
=
ImageFont
.
truetype
(
self
.
font_path
,
font_size
)
...
...
@@ -313,6 +325,7 @@ class WordCloud(object):
# recompute bottom right
# the order of the cumsum's is important for speed ?!
occupancy
.
update
(
img_array
,
x
,
y
)
last_freq
=
freq
self
.
layout_
=
list
(
zip
(
frequencies
,
font_sizes
,
positions
,
orientations
,
colors
))
return
self
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment