mirror of
https://github.com/codelucas/newspaper.git
synced 2025-12-23 05:36:50 +00:00
added complete test cases for config setting
This commit is contained in:
parent
e70511e002
commit
15bbd8a9db
5 changed files with 42 additions and 37 deletions
|
|
@ -13,32 +13,34 @@ from .settings import POPULAR_URLS, TRENDING_URL
|
|||
from .configuration import Configuration
|
||||
from .mthreading import NewsPool
|
||||
from .configuration import Configuration
|
||||
from .utils import print_available_languages
|
||||
from .utils import print_available_languages, extend_config
|
||||
|
||||
def build(url=u'', config=None):
|
||||
def build(url=u'', dry=False, config=None, **kwargs):
|
||||
"""
|
||||
Returns a constructed source object without
|
||||
downloading or parsing the articles.
|
||||
"""
|
||||
config = config or Configuration() # Order matters
|
||||
url = url or '' # Empty string precedence over None
|
||||
valid_href = ('://' in url) and (url[:4] == 'http')
|
||||
config = extend_config(config, kwargs)
|
||||
|
||||
if not valid_href:
|
||||
print 'ERR: provide a valid url'
|
||||
return None
|
||||
url = url or ''
|
||||
s = Source(url, config=config)
|
||||
|
||||
s = Source(url, config)
|
||||
s.build()
|
||||
# dry means we are just testing, don't actually build source
|
||||
if not dry:
|
||||
s.build()
|
||||
return s
|
||||
|
||||
def build_article(url=u''):
|
||||
def build_article(url=u'', config=None, **kwargs):
|
||||
"""
|
||||
Returns a constructed article object without
|
||||
downloading or parsing.
|
||||
"""
|
||||
url = url or '' # empty string precedence over None
|
||||
a = Article(url)
|
||||
config = config or Configuration() # Order matters
|
||||
config = extend_config(config, kwargs)
|
||||
|
||||
url = url or ''
|
||||
a = Article(url, config=config)
|
||||
return a
|
||||
|
||||
def languages():
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ from . import network
|
|||
from . import settings
|
||||
from .configuration import Configuration
|
||||
from .extractors import StandardContentExtractor
|
||||
from .utils import URLHelper, encodeValue, RawHelper
|
||||
from .utils import URLHelper, encodeValue, RawHelper, extend_config
|
||||
from .cleaners import StandardDocumentCleaner
|
||||
from .outputformatters import StandardOutputFormatter
|
||||
from .videos.extractors import VideoExtractor
|
||||
|
|
@ -33,23 +33,13 @@ class ArticleException(Exception):
|
|||
class Article(object):
|
||||
"""
|
||||
"""
|
||||
def extend_config(self, config_items):
|
||||
"""
|
||||
We are handling config value setting like this for a cleaner api.
|
||||
Users just need to pass in a named param to this article and we
|
||||
can dynamically set a config object for it.
|
||||
"""
|
||||
for key, val in config_items.items():
|
||||
if hasattr(self.config, key):
|
||||
setattr(self.config, key, val)
|
||||
|
||||
def __init__(self, url, title=u'', source_url=u'', config=None, **kwargs):
|
||||
"""
|
||||
The **kwargs arguement can be filled with config values which we then
|
||||
push in.
|
||||
"""
|
||||
self.config = config or Configuration()
|
||||
self.extend_config(kwargs)
|
||||
self.config = extend_config(self.config, kwargs)
|
||||
|
||||
self.parser = self.config.get_parser()
|
||||
self.extractor = StandardContentExtractor(config=self.config)
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ from .extractors import StandardContentExtractor
|
|||
from .urls import (
|
||||
get_domain, get_scheme, prepare_url)
|
||||
from .utils import (
|
||||
memoize_articles, cache_disk, clear_memo_cache, encodeValue)
|
||||
memoize_articles, cache_disk, clear_memo_cache, encodeValue, extend_config)
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -52,16 +52,6 @@ class Source(object):
|
|||
articles = [<article obj>, <article obj>, ..]
|
||||
brand = 'cnn'
|
||||
"""
|
||||
def extend_config(self, config_items):
|
||||
"""
|
||||
We are handling config value setting like this for a cleaner api.
|
||||
Users just need to pass in a named param to this source and we can
|
||||
dynamically generate a config object for it.
|
||||
"""
|
||||
for key, val in config_items.items():
|
||||
if hasattr(self.config, key):
|
||||
setattr(self.config, key, val)
|
||||
|
||||
def __init__(self, url, config=None, **kwargs):
|
||||
"""
|
||||
**The config object for this source will be passed into all of this
|
||||
|
|
@ -71,7 +61,7 @@ class Source(object):
|
|||
raise Exception('Input url is bad!')
|
||||
|
||||
self.config = config or Configuration() # Order matters
|
||||
self.extend_config(kwargs)
|
||||
self.config = extend_config(self.config, kwargs)
|
||||
|
||||
self.parser = self.config.get_parser()
|
||||
self.extractor = StandardContentExtractor(config=self.config)
|
||||
|
|
|
|||
|
|
@ -369,4 +369,15 @@ def print_available_languages():
|
|||
print ' %s\t\t\t %s' % (code, language_dict[code])
|
||||
print
|
||||
|
||||
def extend_config(config, config_items):
|
||||
"""
|
||||
We are handling config value setting like this for a cleaner api.
|
||||
Users just need to pass in a named param to this source and we can
|
||||
dynamically generate a config object for it.
|
||||
"""
|
||||
for key, val in config_items.items():
|
||||
if hasattr(config, key):
|
||||
setattr(config, key, val)
|
||||
|
||||
return config
|
||||
|
||||
|
|
|
|||
|
|
@ -232,7 +232,7 @@ class APITestCase(unittest.TestCase):
|
|||
|
||||
@print_test
|
||||
def test_source_build(self):
|
||||
huff_paper = newspaper.build('http://www.huffingtonpost.com/')
|
||||
huff_paper = newspaper.build('http://www.huffingtonpost.com/', dry=True)
|
||||
assert isinstance(huff_paper, Source) == True
|
||||
|
||||
@print_test
|
||||
|
|
@ -346,6 +346,18 @@ class ConfigBuildTestCase(unittest.TestCase):
|
|||
assert s.config.language == 'en'
|
||||
assert s.config.use_meta_language == False
|
||||
|
||||
s = newspaper.build('http://cnn.com', dry=True)
|
||||
assert s.config.language == 'en'
|
||||
assert s.config.MAX_FILE_MEMO == 20000
|
||||
assert s.config.memoize_articles == True
|
||||
assert s.config.use_meta_language == True
|
||||
|
||||
s = newspaper.build('http://cnn.com', dry=True, memoize_articles=False,
|
||||
MAX_FILE_MEMO=10000, language='zh')
|
||||
assert s.config.language == 'zh'
|
||||
assert s.config.MAX_FILE_MEMO == 10000
|
||||
assert s.config.memoize_articles == False
|
||||
assert s.config.use_meta_language == False
|
||||
|
||||
class MultiLanguageTestCase(unittest.TestCase):
|
||||
def runTest(self):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue