Commit 0869b1f2 authored by Andreas Mueller's avatar Andreas Mueller

Merge pull request #129 from amueller/stopwords_lower

lower-case stopwords before using them.
parents 1f18b75f 425d58f9
...@@ -54,6 +54,14 @@ def test_default(): ...@@ -54,6 +54,14 @@ def test_default():
assert_equal(wc_array.shape, (wc.height, wc.width, 3)) assert_equal(wc_array.shape, (wc.height, wc.width, 3))
def test_stopwords_lowercasing():
# test that capitalized stopwords work.
wc = WordCloud(stopwords=["Beautiful"])
processed = wc.process_text(THIS)
words = [count[0] for count in processed]
assert_true("Beautiful" not in words)
def test_writing_to_file(): def test_writing_to_file():
wc = WordCloud() wc = WordCloud()
wc.generate(THIS) wc.generate(THIS)
......
...@@ -79,6 +79,7 @@ def random_color_func(word=None, font_size=None, position=None, ...@@ -79,6 +79,7 @@ def random_color_func(word=None, font_size=None, position=None,
random_state = Random() random_state = Random()
return "hsl(%d, 80%%, 50%%)" % random_state.randint(0, 255) return "hsl(%d, 80%%, 50%%)" % random_state.randint(0, 255)
def get_single_color_func(color): def get_single_color_func(color):
"""Create a color function which returns a single hue and saturation with. """Create a color function which returns a single hue and saturation with.
different values (HSV). Accepted values are color strings as usable by PIL/Pillow. different values (HSV). Accepted values are color strings as usable by PIL/Pillow.
...@@ -88,7 +89,8 @@ def get_single_color_func(color): ...@@ -88,7 +89,8 @@ def get_single_color_func(color):
""" """
old_r, old_g, old_b = ImageColor.getrgb(color) old_r, old_g, old_b = ImageColor.getrgb(color)
rgb_max = 255. rgb_max = 255.
h, s, v = colorsys.rgb_to_hsv(old_r/rgb_max, old_g/rgb_max, old_b/rgb_max) h, s, v = colorsys.rgb_to_hsv(old_r / rgb_max, old_g / rgb_max, old_b / rgb_max)
def single_color_func(word=None, font_size=None, position=None, def single_color_func(word=None, font_size=None, position=None,
orientation=None, font_path=None, random_state=None): orientation=None, font_path=None, random_state=None):
"""Random color generation. """Random color generation.
...@@ -265,7 +267,7 @@ class WordCloud(object): ...@@ -265,7 +267,7 @@ class WordCloud(object):
# largest entry will be 1 # largest entry will be 1
max_frequency = float(frequencies[0][1]) max_frequency = float(frequencies[0][1])
frequencies = [ (word, freq / max_frequency) for word, freq in frequencies ] frequencies = [(word, freq / max_frequency) for word, freq in frequencies]
self.words_ = frequencies self.words_ = frequencies
...@@ -380,6 +382,8 @@ class WordCloud(object): ...@@ -380,6 +382,8 @@ class WordCloud(object):
include all those things. include all those things.
""" """
self.stopwords_lower_ = [word.lower() for word in self.stopwords]
d = {} d = {}
flags = (re.UNICODE if sys.version < '3' and type(text) is unicode flags = (re.UNICODE if sys.version < '3' and type(text) is unicode
else 0) else 0)
...@@ -388,7 +392,7 @@ class WordCloud(object): ...@@ -388,7 +392,7 @@ class WordCloud(object):
continue continue
word_lower = word.lower() word_lower = word.lower()
if word_lower in self.stopwords: if word_lower in self.stopwords_lower_:
continue continue
# Look in lowercase dict. # Look in lowercase dict.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment