fix bug

2025-08-04 17:08:01 +00:00 · 2017-02-24 17:53:48 +01:00 · 2017-02-24 17:53:48 +01:00 · b78bb39ddc
commit b78bb39ddc
parent 331a60ebff
4 changed files with 44 additions and 46 deletions
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -1,4 +1,5 @@
 include newsplease/config/config.cfg
+include newsplease/config/config_lib.cfg
 include newsplease/config/sitelist.hjson
 include LICENSE.txt
 include README.md
--- a/newsplease/init.py
+++ b/newsplease/init.py
@ -0,0 +1,42 @@
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.realpath(__file__)))
+from newsplease.pipeline.pipelines import InMemoryStorage
+from newsplease.single_crawler import SingleCrawler
+
+
+class NewsPleaseLib:
+    """
+    Access news-please functionality via this interface
+    """
+
+    @staticmethod
+    def download_article(url):
+        """
+        Crawls the article from the url and extracts relevant information.
+        :param url:
+        :return:
+        """
+        SingleCrawler.create_as_library(url)
+        results = InMemoryStorage.get_results()
+        article = results[url]
+        del results[url]
+        return article
+
+    @staticmethod
+    def download_articles(urls):
+        """
+        Crawls articles from the urls and extracts relevant information.
+        :param urls:
+        :return:
+        """
+        SingleCrawler.create_as_library(urls)
+        results = InMemoryStorage.get_results()
+        articles = []
+        for url in urls:
+            article = results[url]
+            del results[url]
+            articles.append(article)
+            print(article['title'])
+        return articles
--- a/newsplease/newspleaselib.py
+++ b/newsplease/newspleaselib.py
@ -1,45 +0,0 @@
-import sys
-import os
-
-sys.path.append(os.path.dirname(os.path.realpath(__file__)))
-from newsplease.pipeline.pipelines import InMemoryStorage
-from newsplease.single_crawler import SingleCrawler
-
-
-class NewsPleaseLib:
-    """
-    Access news-please functionality via this interface
-    """
-
-    @staticmethod
-    def download_article(url):
-        """
-        Crawls the article from the url and extracts relevant information.
-        :param url:
-        :return:
-        """
-        SingleCrawler.create_as_library(url)
-        results = InMemoryStorage.get_results()
-        article = results[url]
-        del results[url]
-        return article
-
-    @staticmethod
-    def download_articles(urls):
-        """
-        Crawls articles from the urls and extracts relevant information.
-        :param urls:
-        :return:
-        """
-        SingleCrawler.create_as_library(urls)
-        results = InMemoryStorage.get_results()
-        articles = []
-        for url in urls:
-            article = results[url]
-            del results[url]
-            articles.append(article)
-            print(article['title'])
-        return articles
-
-if __name__ == '__main__':
-    NewsPleaseLib.download_article('http://www.zeit.de/politik/deutschland/2017-02/fluechtlinge-asylverfahren-bamf-taeuschung-afghanistan')
--- a/setup.py
+++ b/setup.py
@ -4,7 +4,7 @@ import sys, os


 setup(name='news-please',
-      version='1.0.25',
+      version='1.0.27',
      description="news-please is an open source easy-to-use news extractor that just works.",
      long_description="""\
 news-please is an open source, easy-to-use news crawler that extracts structured information from almost any news website. It can follow recursively internal hyperlinks and read RSS feeds to fetch both most recent and also old, archived articles. You only need to provide the root URL of the news website.""",