mirror of
https://github.com/codelucas/newspaper.git
synced 2025-12-23 05:36:50 +00:00
Merge 36b57399c0 into 90441f027e
This commit is contained in:
commit
0c65f7b4fa
2 changed files with 5 additions and 0 deletions
|
|
@ -29,6 +29,9 @@ class DocumentCleaner(object):
|
|||
"|konafilter|KonaFilter|breadcrumbs|^fn$|wp-caption-text"
|
||||
"|legende|ajoutVideo|timestamp|js_replies"
|
||||
)
|
||||
# enable adding additional remove patterns through the config object
|
||||
if self.config.additional_remove_nodes_re:
|
||||
self.remove_nodes_re += '|' + self.config.additional_remove_nodes_re
|
||||
self.regexp_namespace = "http://exslt.org/regular-expressions"
|
||||
self.nauthy_ids_re = ("//*[re:test(@id, '%s', 'i')]" %
|
||||
self.remove_nodes_re)
|
||||
|
|
|
|||
|
|
@ -73,6 +73,8 @@ class Configuration(object):
|
|||
|
||||
self.thread_timeout_seconds = 1
|
||||
self.ignored_content_types_defaults = {}
|
||||
|
||||
self.additional_remove_nodes_re = None
|
||||
# Set this to False if you want to recompute the categories
|
||||
# *every* time you build a `Source` object
|
||||
# TODO: Actually make this work
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue