added complete test cases for config setting

This commit is contained in:
Lucas Ou-Yang 2014-01-09 00:28:14 -08:00
parent e70511e002
commit 15bbd8a9db
5 changed files with 42 additions and 37 deletions

View file

@ -13,32 +13,34 @@ from .settings import POPULAR_URLS, TRENDING_URL
from .configuration import Configuration
from .mthreading import NewsPool
from .configuration import Configuration
from .utils import print_available_languages
from .utils import print_available_languages, extend_config
def build(url=u'', config=None):
def build(url=u'', dry=False, config=None, **kwargs):
"""
Returns a constructed source object without
downloading or parsing the articles.
"""
config = config or Configuration() # Order matters
url = url or '' # Empty string precedence over None
valid_href = ('://' in url) and (url[:4] == 'http')
config = extend_config(config, kwargs)
if not valid_href:
print 'ERR: provide a valid url'
return None
url = url or ''
s = Source(url, config=config)
s = Source(url, config)
s.build()
# dry means we are just testing, don't actually build source
if not dry:
s.build()
return s
def build_article(url=u''):
def build_article(url=u'', config=None, **kwargs):
"""
Returns a constructed article object without
downloading or parsing.
"""
url = url or '' # empty string precedence over None
a = Article(url)
config = config or Configuration() # Order matters
config = extend_config(config, kwargs)
url = url or ''
a = Article(url, config=config)
return a
def languages():

View file

@ -18,7 +18,7 @@ from . import network
from . import settings
from .configuration import Configuration
from .extractors import StandardContentExtractor
from .utils import URLHelper, encodeValue, RawHelper
from .utils import URLHelper, encodeValue, RawHelper, extend_config
from .cleaners import StandardDocumentCleaner
from .outputformatters import StandardOutputFormatter
from .videos.extractors import VideoExtractor
@ -33,23 +33,13 @@ class ArticleException(Exception):
class Article(object):
"""
"""
def extend_config(self, config_items):
"""
We are handling config value setting like this for a cleaner api.
Users just need to pass in a named param to this article and we
can dynamically set a config object for it.
"""
for key, val in config_items.items():
if hasattr(self.config, key):
setattr(self.config, key, val)
def __init__(self, url, title=u'', source_url=u'', config=None, **kwargs):
"""
The **kwargs arguement can be filled with config values which we then
push in.
"""
self.config = config or Configuration()
self.extend_config(kwargs)
self.config = extend_config(self.config, kwargs)
self.parser = self.config.get_parser()
self.extractor = StandardContentExtractor(config=self.config)

View file

@ -20,7 +20,7 @@ from .extractors import StandardContentExtractor
from .urls import (
get_domain, get_scheme, prepare_url)
from .utils import (
memoize_articles, cache_disk, clear_memo_cache, encodeValue)
memoize_articles, cache_disk, clear_memo_cache, encodeValue, extend_config)
log = logging.getLogger(__name__)
@ -52,16 +52,6 @@ class Source(object):
articles = [<article obj>, <article obj>, ..]
brand = 'cnn'
"""
def extend_config(self, config_items):
"""
We are handling config value setting like this for a cleaner api.
Users just need to pass in a named param to this source and we can
dynamically generate a config object for it.
"""
for key, val in config_items.items():
if hasattr(self.config, key):
setattr(self.config, key, val)
def __init__(self, url, config=None, **kwargs):
"""
**The config object for this source will be passed into all of this
@ -71,7 +61,7 @@ class Source(object):
raise Exception('Input url is bad!')
self.config = config or Configuration() # Order matters
self.extend_config(kwargs)
self.config = extend_config(self.config, kwargs)
self.parser = self.config.get_parser()
self.extractor = StandardContentExtractor(config=self.config)

View file

@ -369,4 +369,15 @@ def print_available_languages():
print ' %s\t\t\t %s' % (code, language_dict[code])
print
def extend_config(config, config_items):
"""
We are handling config value setting like this for a cleaner api.
Users just need to pass in a named param to this source and we can
dynamically generate a config object for it.
"""
for key, val in config_items.items():
if hasattr(config, key):
setattr(config, key, val)
return config

View file

@ -232,7 +232,7 @@ class APITestCase(unittest.TestCase):
@print_test
def test_source_build(self):
huff_paper = newspaper.build('http://www.huffingtonpost.com/')
huff_paper = newspaper.build('http://www.huffingtonpost.com/', dry=True)
assert isinstance(huff_paper, Source) == True
@print_test
@ -346,6 +346,18 @@ class ConfigBuildTestCase(unittest.TestCase):
assert s.config.language == 'en'
assert s.config.use_meta_language == False
s = newspaper.build('http://cnn.com', dry=True)
assert s.config.language == 'en'
assert s.config.MAX_FILE_MEMO == 20000
assert s.config.memoize_articles == True
assert s.config.use_meta_language == True
s = newspaper.build('http://cnn.com', dry=True, memoize_articles=False,
MAX_FILE_MEMO=10000, language='zh')
assert s.config.language == 'zh'
assert s.config.MAX_FILE_MEMO == 10000
assert s.config.memoize_articles == False
assert s.config.use_meta_language == False
class MultiLanguageTestCase(unittest.TestCase):
def runTest(self):