From dd839cedfd9aac1163ce49c6ae275aaa6ab7c27b Mon Sep 17 00:00:00 2001
From: Adam Nelson <adam@factr.com>
Date: Thu, 7 Jul 2016 14:11:05 -0400
Subject: [PATCH 1/2] Reimplement parse_feeds() to grab titles of feeds

---
 newspaper/configuration.py |  4 ++--
 newspaper/extractors.py    |  4 ++--
 newspaper/source.py        | 26 +++++++++++---------------
 requirements.txt           |  1 -
 4 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/newspaper/configuration.py b/newspaper/configuration.py
index f93a532..f49c4ce 100644
--- a/newspaper/configuration.py
+++ b/newspaper/configuration.py
@@ -25,7 +25,7 @@ class Configuration(object):
     def __init__(self):
         """
         Modify any of these Article / Source properties
-        TODO: Have a seperate ArticleConfig and SourceConfig extend this!
+        TODO: Have a separate ArticleConfig and SourceConfig extend this!
         """
         self.MIN_WORD_COUNT = 300  # num of word tokens in text
         self.MIN_SENT_COUNT = 7    # num of sentence tokens
@@ -55,7 +55,7 @@ class Configuration(object):
         # You may keep the html of just the main article body
         self.keep_article_html = False
 
-        # Fail for error respones (e.g. 404 page)
+        # Fail for error responses (e.g. 404 page)
         self.http_success_only = True
 
         # English is the fallback
diff --git a/newspaper/extractors.py b/newspaper/extractors.py
index 0762905..f2cb14d 100644
--- a/newspaper/extractors.py
+++ b/newspaper/extractors.py
@@ -229,7 +229,7 @@ class ContentExtractor(object):
         - title tag is the most reliable (inherited from Goose)
         - h1, if properly detected, is the best (visible to users)
         - og:title and h1 can help improve the title extraction
-        - python == is too strict, often we need to compare fitlered
+        - python == is too strict, often we need to compare filtered
           versions, i.e. lowercase and ignoring special chars
 
         Explicit rules:
@@ -251,7 +251,7 @@ class ContentExtractor(object):
 
         # title from h1
         # - extract the longest text from all h1 elements
-        # - too short texts (less than 2 words) are discarded
+        # - too short texts (fewer than 2 words) are discarded
         # - clean double spaces
         title_text_h1 = ''
         title_element_h1_list = self.parser.getElementsByTag(doc, tag='h1') or []
diff --git a/newspaper/source.py b/newspaper/source.py
index adac3e6..ef0cbf5 100644
--- a/newspaper/source.py
+++ b/newspaper/source.py
@@ -98,7 +98,7 @@ class Source(object):
 
         self.set_feeds()
         self.download_feeds()       # mthread
-        # TODO: self.parse_feeds()  # regex for now
+        # self.parse_feeds()
 
         self.generate_articles()
 
@@ -204,21 +204,17 @@ class Source(object):
 
         self.categories = [c for c in self.categories if c.doc is not None]
 
-    def parse_feeds(self):
-        """DEPRECATED
-        Due to the slow speed of feedparser, we won't be dom parsing
-        our .rss feeds, but rather regex searching for urls in the .rss
-        text and then relying on our article logic to detect false urls.
-        """
-        for feed in self.feeds:
-            try:
-                feed.dom = feedparser.parse(feed.html)
-            except Exception as e:
-                log.critical('feedparser failed %s' % e)
-                if self.config.verbose:
-                    print('feed %s has failed parsing' % feed.url)
+    def _map_title_to_feed(self,feed):
+        doc = self.config.get_parser().fromstring(feed.rss)
+        feed.title = self.config.get_parser().getElementsByTag(doc, tag='title')[0].text or ''
+        return feed
 
-        self.feeds = [feed for feed in self.feeds if feed.dom is not None]
+    def parse_feeds(self):
+        """Add titles to feeds
+        """
+        log.debug('We are parsing %d feeds' %
+                  len(self.feeds))
+        self.feeds = [self._map_title_to_feed(f) for f in self.feeds]
 
     def feeds_to_articles(self):
         """Returns articles given the url of a feed
diff --git a/requirements.txt b/requirements.txt
index 588f5b5..b974150 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,7 +5,6 @@ cssselect>=0.9.1
 lxml>=3.3.5
 nltk>=3.2
 requests>=2.3.0
-feedparser>=5.1.3
 tldextract>=1.5.1
 feedfinder2>=0.0.4
 jieba3k>=0.35.1

From 07871f8ce706844802edc19a3e5cbbfb291994bd Mon Sep 17 00:00:00 2001
From: Adam Nelson <adam@factr.com>
Date: Fri, 8 Jul 2016 15:10:04 -0400
Subject: [PATCH 2/2] source.parse_feeds() now adds titles to source.feeds

---
 .travis.yml                   |   1 -
 newspaper/__init__.py         |   5 +-
 newspaper/api.py              |   5 +-
 newspaper/configuration.py    |  19 ++++---
 newspaper/extractors.py       | 102 +++++++++++++++++-----------------
 newspaper/outputformatters.py |   4 +-
 newspaper/parsers.py          |   4 +-
 newspaper/source.py           |  36 +++---------
 8 files changed, 78 insertions(+), 98 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 28d9525..21e6e47 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,5 @@
 language: python
 python:
- - "3.3"
  - "3.4"
  - "3.5"
 install:
diff --git a/newspaper/__init__.py b/newspaper/__init__.py
index eaef885..3ce5aa4 100644
--- a/newspaper/__init__.py
+++ b/newspaper/__init__.py
@@ -7,9 +7,10 @@ __author__ = 'Lucas Ou-Yang'
 __license__ = 'MIT'
 __copyright__ = 'Copyright 2014, Lucas Ou-Yang'
 
-from .article import Article, ArticleException
 from .api import (build, build_article, fulltext, hot, languages,
-                  popular_urls, NewsPool, Configuration as Config)
+                  popular_urls, Configuration as Config)
+from .article import Article, ArticleException
+from .mthreading import NewsPool
 from .source import Source
 from .version import __version__
 
diff --git a/newspaper/api.py b/newspaper/api.py
index f69338f..fb98e81 100644
--- a/newspaper/api.py
+++ b/newspaper/api.py
@@ -13,13 +13,12 @@ import feedparser
 
 from .article import Article
 from .configuration import Configuration
-from .mthreading import NewsPool
 from .settings import POPULAR_URLS, TRENDING_URL
 from .source import Source
 from .utils import extend_config, print_available_languages
 
 
-def build(url='', dry=False, config=None, **kwargs):
+def build(url='', dry=False, config=None, **kwargs) -> Source:
     """Returns a constructed source object without
     downloading or parsing the articles
     """
@@ -32,7 +31,7 @@ def build(url='', dry=False, config=None, **kwargs):
     return s
 
 
-def build_article(url='', config=None, **kwargs):
+def build_article(url='', config=None, **kwargs) -> Article:
     """Returns a constructed article object without downloading
     or parsing
     """
diff --git a/newspaper/configuration.py b/newspaper/configuration.py
index f49c4ce..5a7b5ec 100644
--- a/newspaper/configuration.py
+++ b/newspaper/configuration.py
@@ -21,19 +21,18 @@ log = logging.getLogger(__name__)
 
 
 class Configuration(object):
-
     def __init__(self):
         """
         Modify any of these Article / Source properties
         TODO: Have a separate ArticleConfig and SourceConfig extend this!
         """
         self.MIN_WORD_COUNT = 300  # num of word tokens in text
-        self.MIN_SENT_COUNT = 7    # num of sentence tokens
-        self.MAX_TITLE = 200       # num of chars
-        self.MAX_TEXT = 100000     # num of chars
-        self.MAX_KEYWORDS = 35     # num of strings in list
-        self.MAX_AUTHORS = 10      # num strings in list
-        self.MAX_SUMMARY = 5000    # num of chars
+        self.MIN_SENT_COUNT = 7  # num of sentence tokens
+        self.MAX_TITLE = 200  # num of chars
+        self.MAX_TEXT = 100000  # num of chars
+        self.MAX_KEYWORDS = 35  # num of strings in list
+        self.MAX_AUTHORS = 10  # num strings in list
+        self.MAX_SUMMARY = 5000  # num of chars
         self.MAX_SUMMARY_SENT = 5  # num of sentences
 
         # max number of urls we cache for each news source
@@ -101,7 +100,8 @@ class Configuration(object):
     language = property(get_language, set_language,
                         del_language, "language prop")
 
-    def get_stopwords_class(self, language):
+    @staticmethod
+    def get_stopwords_class(language):
         if language == 'ko':
             return StopWordsKorean
         elif language == 'zh':
@@ -110,7 +110,8 @@ class Configuration(object):
             return StopWordsArabic
         return StopWords
 
-    def get_parser(self):
+    @staticmethod
+    def get_parser():
         return Parser
 
 
diff --git a/newspaper/extractors.py b/newspaper/extractors.py
index f2cb14d..0a763b9 100644
--- a/newspaper/extractors.py
+++ b/newspaper/extractors.py
@@ -11,17 +11,16 @@ __author__ = 'Lucas Ou-Yang'
 __license__ = 'MIT'
 __copyright__ = 'Copyright 2014, Lucas Ou-Yang'
 
-from collections import defaultdict
-import copy
-from dateutil.parser import parse as date_parser
 import logging
-import re
 import urllib.parse
+from collections import defaultdict
 
+import copy
+import re
+from dateutil.parser import parse as date_parser
 from tldextract import tldextract
 
 from . import urls
-
 from .utils import StringReplacement, StringSplitter
 
 log = logging.getLogger(__name__)
@@ -53,7 +52,6 @@ bad_domains = ['amazon', 'doubleclick', 'twitter']
 
 
 class ContentExtractor(object):
-
     def __init__(self, config):
         self.config = config
         self.parser = self.config.get_parser()
@@ -61,10 +59,10 @@ class ContentExtractor(object):
         self.stopwords_class = config.stopwords_class
 
     def update_language(self, meta_lang):
-        '''Required to be called before the extraction process in some
+        """Required to be called before the extraction process in some
         cases because the stopwords_class has to set incase the lang
         is not latin based
-        '''
+        """
         if meta_lang:
             self.language = meta_lang
             self.stopwords_class = \
@@ -93,10 +91,10 @@ class ContentExtractor(object):
             return result
 
         def parse_byline(search_str):
-            """Takes a candidate line of html or text and
-            extracts out the name(s) in list form
-            >>> search_str('<div>By: <strong>Lucas Ou-Yang</strong>, \
-                            <strong>Alex Smith</strong></div>')
+            """
+            Takes a candidate line of html or text and
+            extracts out the name(s) in list form:
+            >>> parse_byline('<div>By: <strong>Lucas Ou-Yang</strong>,<strong>Alex Smith</strong></div>')
             ['Lucas Ou-Yang', 'Alex Smith']
             """
             # Remove HTML boilerplate
@@ -116,10 +114,10 @@ class ContentExtractor(object):
             _authors = []
             # List of first, last name tokens
             curname = []
-            DELIM = ['and', ',', '']
+            delimiters = ['and', ',', '']
 
             for token in name_tokens:
-                if token in DELIM:
+                if token in delimiters:
                     if len(curname) > 0:
                         _authors.append(' '.join(curname))
                         curname = []
@@ -184,7 +182,7 @@ class ContentExtractor(object):
             try:
                 datetime_obj = date_parser(date_str)
                 return datetime_obj
-            except:
+            except (ValueError, OverflowError):
                 # near all parse failures are due to URL dates without a day
                 # specifier, e.g. /2014/04/
                 return None
@@ -341,8 +339,8 @@ class ContentExtractor(object):
             hint = filter_regex.sub('', hint).lower()
 
         # find the largest title piece
-        for i in range(len(title_pieces)):
-            current = title_pieces[i].strip()
+        for i, title_piece in enumerate(title_pieces):
+            current = title_piece.strip()
             if hint and hint in filter_regex.sub('', current).lower():
                 large_text_index = i
                 break
@@ -393,7 +391,7 @@ class ContentExtractor(object):
             # look up for a Content-Language in meta
             items = [
                 {'tag': 'meta', 'attr': 'http-equiv',
-                    'value': 'content-language'},
+                 'value': 'content-language'},
                 {'tag': 'meta', 'attr': 'name', 'value': 'lang'}
             ]
             for item in items:
@@ -409,14 +407,14 @@ class ContentExtractor(object):
 
         return None
 
-    def get_meta_content(self, doc, metaName):
+    def get_meta_content(self, doc, metaname):
         """Extract a given meta content form document.
         Example metaNames:
             "meta[name=description]"
             "meta[name=keywords]"
             "meta[property=og:type]"
         """
-        meta = self.parser.css_select(doc, metaName)
+        meta = self.parser.css_select(doc, metaname)
         content = None
         if meta is not None and len(meta) > 0:
             content = self.parser.getAttribute(meta[0], 'content')
@@ -540,7 +538,7 @@ class ContentExtractor(object):
         urls = [img_tag.get('src')
                 for img_tag in img_tags if img_tag.get('src')]
         img_links = set([urllib.parse.urljoin(article_url, url)
-                        for url in urls])
+                         for url in urls])
         return img_links
 
     def get_first_img_url(self, article_url, top_node):
@@ -747,7 +745,7 @@ class ContentExtractor(object):
 
         for node in nodes_to_check:
             text_node = self.parser.getText(node)
-            word_stats = self.stopwords_class(language=self.language).\
+            word_stats = self.stopwords_class(language=self.language). \
                 get_stopword_count(text_node)
             high_link_density = self.is_highlink_density(node)
             if word_stats.get_stopword_count() > 2 and not high_link_density:
@@ -760,7 +758,7 @@ class ContentExtractor(object):
         for node in nodes_with_text:
             boost_score = float(0)
             # boost
-            if(self.is_boostable(node)):
+            if self.is_boostable(node):
                 if cnt >= 0:
                     boost_score = float((1.0 / starting_boost) * 50)
                     starting_boost += 1
@@ -775,7 +773,7 @@ class ContentExtractor(object):
                         boost_score = float(5)
 
             text_node = self.parser.getText(node)
-            word_stats = self.stopwords_class(language=self.language).\
+            word_stats = self.stopwords_class(language=self.language). \
                 get_stopword_count(text_node)
             upscore = int(word_stats.get_stopword_count() + boost_score)
 
@@ -827,9 +825,9 @@ class ContentExtractor(object):
             if current_node_tag == para:
                 if steps_away >= max_stepsaway_from_node:
                     return False
-                paraText = self.parser.getText(current_node)
-                word_stats = self.stopwords_class(language=self.language).\
-                    get_stopword_count(paraText)
+                paragraph_text = self.parser.getText(current_node)
+                word_stats = self.stopwords_class(language=self.language). \
+                    get_stopword_count(paragraph_text)
                 if word_stats.get_stopword_count() > minimum_stopword_count:
                     return True
                 steps_away += 1
@@ -844,21 +842,21 @@ class ContentExtractor(object):
         return b
 
     def add_siblings(self, top_node):
-        baselinescore_siblings_para = self.get_siblings_score(top_node)
+        baseline_score_siblings_para = self.get_siblings_score(top_node)
         results = self.walk_siblings(top_node)
         for current_node in results:
             ps = self.get_siblings_content(
-                current_node, baselinescore_siblings_para)
+                current_node, baseline_score_siblings_para)
             for p in ps:
                 top_node.insert(0, p)
         return top_node
 
     def get_siblings_content(
-            self, current_sibling, baselinescore_siblings_para):
+            self, current_sibling, baseline_score_siblings_para):
         """Adds any siblings that may have a decent score to this node
         """
         if current_sibling.tag == 'p' and \
-                len(self.parser.getText(current_sibling)) > 0:
+                        len(self.parser.getText(current_sibling)) > 0:
             e0 = current_sibling
             if e0.tail:
                 e0 = copy.deepcopy(e0)
@@ -874,13 +872,13 @@ class ContentExtractor(object):
                 for first_paragraph in potential_paragraphs:
                     text = self.parser.getText(first_paragraph)
                     if len(text) > 0:
-                        word_stats = self.stopwords_class(language=self.language).\
+                        word_stats = self.stopwords_class(language=self.language). \
                             get_stopword_count(text)
                         paragraph_score = word_stats.get_stopword_count()
                         sibling_baseline_score = float(.30)
                         high_link_density = self.is_highlink_density(
                             first_paragraph)
-                        score = float(baselinescore_siblings_para *
+                        score = float(baseline_score_siblings_para *
                                       sibling_baseline_score)
                         if score < paragraph_score and not high_link_density:
                             p = self.parser.createElement(
@@ -904,7 +902,7 @@ class ContentExtractor(object):
 
         for node in nodes_to_check:
             text_node = self.parser.getText(node)
-            word_stats = self.stopwords_class(language=self.language).\
+            word_stats = self.stopwords_class(language=self.language). \
                 get_stopword_count(text_node)
             high_link_density = self.is_highlink_density(node)
             if word_stats.get_stopword_count() > 2 and not high_link_density:
@@ -916,7 +914,7 @@ class ContentExtractor(object):
 
         return base
 
-    def update_score(self, node, addToScore):
+    def update_score(self, node, add_to_score):
         """Adds a score to the gravityScore Attribute we put on divs
         we'll get the current score then add the score we're passing
         in to the current.
@@ -926,7 +924,7 @@ class ContentExtractor(object):
         if score_string:
             current_score = float(score_string)
 
-        new_score = current_score + addToScore
+        new_score = current_score + add_to_score
         self.parser.setAttribute(node, "gravityScore", str(new_score))
 
     def update_node_count(self, node, add_to_count):
@@ -957,12 +955,12 @@ class ContentExtractor(object):
         for link in links:
             sb.append(self.parser.getText(link))
 
-        linkText = ''.join(sb)
-        linkWords = linkText.split()
-        numberOfLinkWords = float(len(linkWords))
-        numberOfLinks = float(len(links))
-        linkDivisor = float(numberOfLinkWords / words_number)
-        score = float(linkDivisor * numberOfLinks)
+        link_text = ''.join(sb)
+        link_words = link_text.split()
+        num_link_words = float(len(link_words))
+        num_links = float(len(links))
+        link_divisor = float(num_link_words / words_number)
+        score = float(link_divisor * num_links)
         if score >= 1.0:
             return True
         return False
@@ -974,10 +972,10 @@ class ContentExtractor(object):
         return self.get_node_gravity_score(node) or 0
 
     def get_node_gravity_score(self, node):
-        grvScoreString = self.parser.getAttribute(node, 'gravityScore')
-        if not grvScoreString:
+        gravity_score = self.parser.getAttribute(node, 'gravityScore')
+        if not gravity_score:
             return None
-        return float(grvScoreString)
+        return float(gravity_score)
 
     def nodes_to_check(self, doc):
         """Returns a list of nodes we want to search
@@ -990,23 +988,23 @@ class ContentExtractor(object):
         return nodes_to_check
 
     def is_table_and_no_para_exist(self, e):
-        subParagraphs = self.parser.getElementsByTag(e, tag='p')
-        for p in subParagraphs:
+        sub_paragraphs = self.parser.getElementsByTag(e, tag='p')
+        for p in sub_paragraphs:
             txt = self.parser.getText(p)
             if len(txt) < 25:
                 self.parser.remove(p)
 
-        subParagraphs2 = self.parser.getElementsByTag(e, tag='p')
-        if len(subParagraphs2) == 0 and e.tag != "td":
+        sub_paragraphs_2 = self.parser.getElementsByTag(e, tag='p')
+        if len(sub_paragraphs_2) == 0 and e.tag != "td":
             return True
         return False
 
     def is_nodescore_threshold_met(self, node, e):
         top_node_score = self.get_score(node)
-        current_nodeScore = self.get_score(e)
-        thresholdScore = float(top_node_score * .08)
+        current_node_score = self.get_score(e)
+        threshold = float(top_node_score * .08)
 
-        if (current_nodeScore < thresholdScore) and e.tag != 'td':
+        if (current_node_score < threshold) and e.tag != 'td':
             return False
         return True
 
diff --git a/newspaper/outputformatters.py b/newspaper/outputformatters.py
index 02b8d22..7e7e7e8 100644
--- a/newspaper/outputformatters.py
+++ b/newspaper/outputformatters.py
@@ -7,7 +7,7 @@ __author__ = 'Lucas Ou-Yang'
 __license__ = 'MIT'
 __copyright__ = 'Copyright 2014, Lucas Ou-Yang'
 
-from html.parser import HTMLParser
+from html import unescape
 import logging
 
 from .text import innerTrim
@@ -70,7 +70,7 @@ class OutputFormatter(object):
                 txt = None
 
             if txt:
-                txt = HTMLParser().unescape(txt)
+                txt = unescape(txt)
                 txt_lis = innerTrim(txt).split(r'\n')
                 txt_lis = [n.strip(' ') for n in txt_lis]
                 txts.extend(txt_lis)
diff --git a/newspaper/parsers.py b/newspaper/parsers.py
index f4c1847..3483fb7 100644
--- a/newspaper/parsers.py
+++ b/newspaper/parsers.py
@@ -12,7 +12,7 @@ import lxml.html
 import lxml.html.clean
 import re
 import traceback
-from html.parser import HTMLParser
+from html import unescape
 
 from bs4 import UnicodeDammit
 from copy import deepcopy
@@ -247,7 +247,7 @@ class Parser(object):
         if attr:
             attr = node.attrib.get(attr, None)
         if attr:
-            attr = HTMLParser().unescape(attr)
+            attr = unescape(attr)
         return attr
 
     @classmethod
diff --git a/newspaper/source.py b/newspaper/source.py
index ef0cbf5..545fe48 100644
--- a/newspaper/source.py
+++ b/newspaper/source.py
@@ -10,25 +10,20 @@ __copyright__ = 'Copyright 2014, Lucas Ou-Yang'
 
 import logging
 
-import feedparser
-
 from tldextract import tldextract
 
 from . import network
 from . import urls
 from . import utils
-
 from .article import Article
-from .extractors import ContentExtractor
 from .configuration import Configuration
+from .extractors import ContentExtractor
 from .settings import ANCHOR_DIRECTORY
 
-
 log = logging.getLogger(__name__)
 
 
 class Category(object):
-
     def __init__(self, url):
         self.url = url
         self.html = None
@@ -36,7 +31,6 @@ class Category(object):
 
 
 class Feed(object):
-
     def __init__(self, url):
         self.url = url
         self.rss = None
@@ -52,6 +46,7 @@ class Source(object):
     articles   =  [<article obj>, <article obj>, ..]
     brand      =  'cnn'
     """
+
     def __init__(self, url, config=None, **kwargs):
         """The config object for this source will be passed into all of this
         source's children articles unless specified otherwise or re-set.
@@ -85,7 +80,7 @@ class Source(object):
         self.is_parsed = False
         self.is_downloaded = False
 
-    def build(self, response=None):
+    def build(self):
         """Encapsulates download and basic parsing with lxml. May be a
         good idea to split this into download() and parse() methods.
         """
@@ -97,7 +92,7 @@ class Source(object):
         self.parse_categories()
 
         self.set_feeds()
-        self.download_feeds()       # mthread
+        self.download_feeds()  # mthread
         # self.parse_feeds()
 
         self.generate_articles()
@@ -118,7 +113,7 @@ class Source(object):
         return articles
 
     @utils.cache_disk(seconds=(86400 * 1), cache_folder=ANCHOR_DIRECTORY)
-    def _get_category_urls(self, domain):
+    def _get_category_urls(self):
         """The domain param is **necessary**, see .utils.cache_disk for reasons.
         the boilerplate method is so we can use this decorator right.
         We are caching categories for 1 day.
@@ -204,9 +199,10 @@ class Source(object):
 
         self.categories = [c for c in self.categories if c.doc is not None]
 
-    def _map_title_to_feed(self,feed):
+    def _map_title_to_feed(self, feed):
         doc = self.config.get_parser().fromstring(feed.rss)
-        feed.title = self.config.get_parser().getElementsByTag(doc, tag='title')[0].text or ''
+        feed.title = self.config.get_parser().getElementsByTag(doc, tag='title')[
+                         0].text or self.brand
         return feed
 
     def parse_feeds(self):
@@ -243,7 +239,7 @@ class Source(object):
 
             if self.config.verbose:
                 print(('%d->%d->%d for %s' %
-                      (before_purge, after_purge, after_memo, feed.url)))
+                       (before_purge, after_purge, after_memo, feed.url)))
             log.debug('%d->%d->%d for %s' %
                       (before_purge, after_purge, after_memo, feed.url))
         return articles
@@ -374,20 +370,6 @@ class Source(object):
         """
         return [article.url for article in self.articles]
 
-    def get_key(self):
-        # TODO
-        pass
-
-    def clear_anchor_directory(self):
-        """Clears out all files in our directory where we cache anchors
-        the key is sha1(self.domain).hexdigest() fn is ANCHOR_DIR/key.
-        """
-        pass
-        # TODO:
-        # d_pth = os.path.join(
-        #   settings.MEMO_DIR, domain_to_filename(source_domain))
-        # os.path.remove(ANCHOR_DIRECTORY)
-
     def print_summary(self):
         """Prints out a summary of the data in our source instance
         """