Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
keeleliin
word_cloud_est
Commits
425d58f9
Commit
425d58f9
authored
Feb 29, 2016
by
Andreas Mueller
Browse files
lower-case stopwords before using them.
parent
1f18b75f
Changes
2
Hide whitespace changes
Inline
Side-by-side
test/test_wordcloud.py
View file @
425d58f9
...
...
@@ -54,6 +54,14 @@ def test_default():
assert_equal
(
wc_array
.
shape
,
(
wc
.
height
,
wc
.
width
,
3
))
def
test_stopwords_lowercasing
():
# test that capitalized stopwords work.
wc
=
WordCloud
(
stopwords
=
[
"Beautiful"
])
processed
=
wc
.
process_text
(
THIS
)
words
=
[
count
[
0
]
for
count
in
processed
]
assert_true
(
"Beautiful"
not
in
words
)
def
test_writing_to_file
():
wc
=
WordCloud
()
wc
.
generate
(
THIS
)
...
...
wordcloud/wordcloud.py
View file @
425d58f9
...
...
@@ -79,6 +79,7 @@ def random_color_func(word=None, font_size=None, position=None,
random_state
=
Random
()
return
"hsl(%d, 80%%, 50%%)"
%
random_state
.
randint
(
0
,
255
)
def
get_single_color_func
(
color
):
"""Create a color function which returns a single hue and saturation with.
different values (HSV). Accepted values are color strings as usable by PIL/Pillow.
...
...
@@ -88,7 +89,8 @@ def get_single_color_func(color):
"""
old_r
,
old_g
,
old_b
=
ImageColor
.
getrgb
(
color
)
rgb_max
=
255.
h
,
s
,
v
=
colorsys
.
rgb_to_hsv
(
old_r
/
rgb_max
,
old_g
/
rgb_max
,
old_b
/
rgb_max
)
h
,
s
,
v
=
colorsys
.
rgb_to_hsv
(
old_r
/
rgb_max
,
old_g
/
rgb_max
,
old_b
/
rgb_max
)
def
single_color_func
(
word
=
None
,
font_size
=
None
,
position
=
None
,
orientation
=
None
,
font_path
=
None
,
random_state
=
None
):
"""Random color generation.
...
...
@@ -265,7 +267,7 @@ class WordCloud(object):
# largest entry will be 1
max_frequency
=
float
(
frequencies
[
0
][
1
])
frequencies
=
[
(
word
,
freq
/
max_frequency
)
for
word
,
freq
in
frequencies
]
frequencies
=
[(
word
,
freq
/
max_frequency
)
for
word
,
freq
in
frequencies
]
self
.
words_
=
frequencies
...
...
@@ -380,6 +382,8 @@ class WordCloud(object):
include all those things.
"""
self
.
stopwords_lower_
=
[
word
.
lower
()
for
word
in
self
.
stopwords
]
d
=
{}
flags
=
(
re
.
UNICODE
if
sys
.
version
<
'3'
and
type
(
text
)
is
unicode
else
0
)
...
...
@@ -388,7 +392,7 @@ class WordCloud(object):
continue
word_lower
=
word
.
lower
()
if
word_lower
in
self
.
stopwords
:
if
word_lower
in
self
.
stopwords
_lower_
:
continue
# Look in lowercase dict.
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment