diff --git a/newspaper/text.py b/newspaper/text.py index 6ceb763..8c4d88a 100644 --- a/newspaper/text.py +++ b/newspaper/text.py @@ -75,7 +75,7 @@ class StopWords(object): content = content.encode('utf-8') return content.translate(self.TRANS_TABLE, string.punctuation) - def candiate_words(self, stripped_input): + def candidate_words(self, stripped_input): return stripped_input.split(' ') def get_stopword_count(self, content): @@ -83,10 +83,10 @@ class StopWords(object): return WordStats() ws = WordStats() stripped_input = self.remove_punctuation(content) - candiate_words = self.candiate_words(stripped_input) + candidate_words = self.candidate_words(stripped_input) overlapping_stopwords = [] c = 0 - for w in candiate_words: + for w in candidate_words: c += 1 if w.lower() in self.STOP_WORDS: overlapping_stopwords.append(w.lower()) @@ -104,7 +104,7 @@ class StopWordsChinese(StopWords): def __init__(self, language='zh'): super(StopWordsChinese, self).__init__(language='zh') - def candiate_words(self, stripped_input): + def candidate_words(self, stripped_input): # jieba builds a tree that takes a while. avoid building # this tree if we don't use the chinese language from .packages import jieba @@ -122,7 +122,7 @@ class StopWordsArabic(StopWords): def remove_punctuation(self, content): return content - def candiate_words(self, stripped_input): + def candidate_words(self, stripped_input): import nltk s = nltk.stem.isri.ISRIStemmer() words = [] @@ -143,10 +143,10 @@ class StopWordsKorean(StopWords): return WordStats() ws = WordStats() stripped_input = self.remove_punctuation(content) - candiate_words = self.candiate_words(stripped_input) + candidate_words = self.candidate_words(stripped_input) overlapping_stopwords = [] c = 0 - for w in candiate_words: + for w in candidate_words: c += 1 for stop_word in self.STOP_WORDS: overlapping_stopwords.append(stop_word)