Minor Typo candiate_words -> candidate_words

This commit is contained in:
Sandeep 2014-01-20 10:16:41 +05:30
parent 58b15787b1
commit 616aa83f05

View file

@ -75,7 +75,7 @@ class StopWords(object):
content = content.encode('utf-8')
return content.translate(self.TRANS_TABLE, string.punctuation)
def candiate_words(self, stripped_input):
def candidate_words(self, stripped_input):
return stripped_input.split(' ')
def get_stopword_count(self, content):
@ -83,10 +83,10 @@ class StopWords(object):
return WordStats()
ws = WordStats()
stripped_input = self.remove_punctuation(content)
candiate_words = self.candiate_words(stripped_input)
candidate_words = self.candidate_words(stripped_input)
overlapping_stopwords = []
c = 0
for w in candiate_words:
for w in candidate_words:
c += 1
if w.lower() in self.STOP_WORDS:
overlapping_stopwords.append(w.lower())
@ -104,7 +104,7 @@ class StopWordsChinese(StopWords):
def __init__(self, language='zh'):
super(StopWordsChinese, self).__init__(language='zh')
def candiate_words(self, stripped_input):
def candidate_words(self, stripped_input):
# jieba builds a tree that takes a while. avoid building
# this tree if we don't use the chinese language
from .packages import jieba
@ -122,7 +122,7 @@ class StopWordsArabic(StopWords):
def remove_punctuation(self, content):
return content
def candiate_words(self, stripped_input):
def candidate_words(self, stripped_input):
import nltk
s = nltk.stem.isri.ISRIStemmer()
words = []
@ -143,10 +143,10 @@ class StopWordsKorean(StopWords):
return WordStats()
ws = WordStats()
stripped_input = self.remove_punctuation(content)
candiate_words = self.candiate_words(stripped_input)
candidate_words = self.candidate_words(stripped_input)
overlapping_stopwords = []
c = 0
for w in candiate_words:
for w in candidate_words:
c += 1
for stop_word in self.STOP_WORDS:
overlapping_stopwords.append(stop_word)