mirror of
https://github.com/codelucas/newspaper.git
synced 2025-12-23 05:36:50 +00:00
Minor Typo candiate_words -> candidate_words
This commit is contained in:
parent
58b15787b1
commit
616aa83f05
1 changed files with 7 additions and 7 deletions
|
|
@ -75,7 +75,7 @@ class StopWords(object):
|
|||
content = content.encode('utf-8')
|
||||
return content.translate(self.TRANS_TABLE, string.punctuation)
|
||||
|
||||
def candiate_words(self, stripped_input):
|
||||
def candidate_words(self, stripped_input):
|
||||
return stripped_input.split(' ')
|
||||
|
||||
def get_stopword_count(self, content):
|
||||
|
|
@ -83,10 +83,10 @@ class StopWords(object):
|
|||
return WordStats()
|
||||
ws = WordStats()
|
||||
stripped_input = self.remove_punctuation(content)
|
||||
candiate_words = self.candiate_words(stripped_input)
|
||||
candidate_words = self.candidate_words(stripped_input)
|
||||
overlapping_stopwords = []
|
||||
c = 0
|
||||
for w in candiate_words:
|
||||
for w in candidate_words:
|
||||
c += 1
|
||||
if w.lower() in self.STOP_WORDS:
|
||||
overlapping_stopwords.append(w.lower())
|
||||
|
|
@ -104,7 +104,7 @@ class StopWordsChinese(StopWords):
|
|||
def __init__(self, language='zh'):
|
||||
super(StopWordsChinese, self).__init__(language='zh')
|
||||
|
||||
def candiate_words(self, stripped_input):
|
||||
def candidate_words(self, stripped_input):
|
||||
# jieba builds a tree that takes a while. avoid building
|
||||
# this tree if we don't use the chinese language
|
||||
from .packages import jieba
|
||||
|
|
@ -122,7 +122,7 @@ class StopWordsArabic(StopWords):
|
|||
def remove_punctuation(self, content):
|
||||
return content
|
||||
|
||||
def candiate_words(self, stripped_input):
|
||||
def candidate_words(self, stripped_input):
|
||||
import nltk
|
||||
s = nltk.stem.isri.ISRIStemmer()
|
||||
words = []
|
||||
|
|
@ -143,10 +143,10 @@ class StopWordsKorean(StopWords):
|
|||
return WordStats()
|
||||
ws = WordStats()
|
||||
stripped_input = self.remove_punctuation(content)
|
||||
candiate_words = self.candiate_words(stripped_input)
|
||||
candidate_words = self.candidate_words(stripped_input)
|
||||
overlapping_stopwords = []
|
||||
c = 0
|
||||
for w in candiate_words:
|
||||
for w in candidate_words:
|
||||
c += 1
|
||||
for stop_word in self.STOP_WORDS:
|
||||
overlapping_stopwords.append(stop_word)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue